From 14756dc59d1a87ff775fbd1809f9f7be91b8e4c2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 23:04:20 +0000
Subject: [PATCH 001/372] build(deps): bump redis from 3.5.3 to 4.5.5

Bumps [redis](https://github.com/redis/redis-py) from 3.5.3 to 4.5.5.
- [Release notes](https://github.com/redis/redis-py/releases)
- [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES)
- [Commits](https://github.com/redis/redis-py/compare/3.5.3...v4.5.5)

---
updated-dependencies:
- dependency-name: redis
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 273 ++++++++++++++++++++++++++++++++++++++++++++++---
 pyproject.toml |   2 +-
 2 files changed, 262 insertions(+), 13 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index d923189ec5..e97391ad90 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,9 +1,10 @@
-# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
+# This file is automatically @generated by Poetry and should not be changed by hand.
 
 [[package]]
 name = "amqp"
 version = "5.1.1"
 description = "Low-level AMQP client for Python (fork of amqplib)."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -18,6 +19,7 @@ vine = ">=5.0.0"
 name = "anyio"
 version = "3.6.2"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
+category = "main"
 optional = false
 python-versions = ">=3.6.2"
 files = [
@@ -38,6 +40,7 @@ trio = ["trio (>=0.16,<0.22)"]
 name = "appnope"
 version = "0.1.3"
 description = "Disable App Nap on macOS >= 10.9"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -49,6 +52,7 @@ files = [
 name = "argparse"
 version = "1.4.0"
 description = "Python command-line parsing library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -60,6 +64,7 @@ files = [
 name = "asgiref"
 version = "3.6.0"
 description = "ASGI specs, helper code, and adapters"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -74,6 +79,7 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
 name = "astor"
 version = "0.8.1"
 description = "Read/rewrite/write Python ASTs"
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
 files = [
@@ -85,6 +91,7 @@ files = [
 name = "astroid"
 version = "2.15.4"
 description = "An abstract syntax tree for Python with inference support."
+category = "dev"
 optional = false
 python-versions = ">=3.7.2"
 files = [
@@ -104,6 +111,7 @@ wrapt = [
 name = "asttokens"
 version = "2.0.8"
 description = "Annotate AST trees with source code positions"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -121,6 +129,7 @@ test = ["astroid (<=2.5.3)", "pytest"]
 name = "async-generator"
 version = "1.10"
 description = "Async generators and context managers for Python 3.5+"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -128,10 +137,23 @@ files = [
     {file = "async_generator-1.10.tar.gz", hash = "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"},
 ]
 
+[[package]]
+name = "async-timeout"
+version = "4.0.2"
+description = "Timeout context manager for asyncio programs"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"},
+    {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"},
+]
+
 [[package]]
 name = "attrs"
 version = "20.3.0"
 description = "Classes Without Boilerplate"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -149,6 +171,7 @@ tests-no-zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>
 name = "autobahn"
 version = "23.1.2"
 description = "WebSocket client & server library, WAMP real-time framework"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -177,6 +200,7 @@ xbr = ["base58 (>=2.1.0)", "cbor2 (>=5.2.0)", "click (>=8.1.2)", "ecdsa (>=0.16.
 name = "automat"
 version = "22.10.0"
 description = "Self-service finite-state machines for the programmer on the go."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -195,6 +219,7 @@ visualize = ["Twisted (>=16.1.1)", "graphviz (>0.5.1)"]
 name = "backcall"
 version = "0.2.0"
 description = "Specifications for callback functions passed in to an API"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -206,6 +231,7 @@ files = [
 name = "beautifulsoup4"
 version = "4.11.2"
 description = "Screen-scraping library"
+category = "main"
 optional = false
 python-versions = ">=3.6.0"
 files = [
@@ -224,6 +250,7 @@ lxml = ["lxml"]
 name = "billiard"
 version = "4.1.0"
 description = "Python multiprocessing fork with improvements and bugfixes"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -235,6 +262,7 @@ files = [
 name = "black"
 version = "23.3.0"
 description = "The uncompromising code formatter."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -283,6 +311,7 @@ uvloop = ["uvloop (>=0.15.2)"]
 name = "boto3"
 version = "1.17.43"
 description = "The AWS SDK for Python"
+category = "main"
 optional = false
 python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -299,6 +328,7 @@ s3transfer = ">=0.3.0,<0.4.0"
 name = "botocore"
 version = "1.20.43"
 description = "Low-level, data-driven core of boto 3."
+category = "main"
 optional = false
 python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -318,6 +348,7 @@ crt = ["awscrt (==0.10.8)"]
 name = "celery"
 version = "5.3.0"
 description = "Distributed Task Queue."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -373,6 +404,7 @@ zstd = ["zstandard (==0.21.0)"]
 name = "certifi"
 version = "2022.12.7"
 description = "Python package for providing Mozilla's CA Bundle."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -384,6 +416,7 @@ files = [
 name = "cffi"
 version = "1.14.5"
 description = "Foreign Function Interface for Python calling C code."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -445,6 +478,7 @@ pycparser = "*"
 name = "cfgv"
 version = "3.3.1"
 description = "Validate configuration and produce human readable error messages."
+category = "dev"
 optional = false
 python-versions = ">=3.6.1"
 files = [
@@ -456,6 +490,7 @@ files = [
 name = "chardet"
 version = "5.1.0"
 description = "Universal encoding detector for Python 3"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -467,6 +502,7 @@ files = [
 name = "charset-normalizer"
 version = "3.1.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -551,6 +587,7 @@ files = [
 name = "click"
 version = "8.1.2"
 description = "Composable command line interface toolkit"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -565,6 +602,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
 name = "click-didyoumean"
 version = "0.3.0"
 description = "Enables git-like *did-you-mean* feature in click"
+category = "main"
 optional = false
 python-versions = ">=3.6.2,<4.0.0"
 files = [
@@ -579,6 +617,7 @@ click = ">=7"
 name = "click-plugins"
 version = "1.1.1"
 description = "An extension module for click to enable registering CLI commands via setuptools entry-points."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -596,6 +635,7 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"]
 name = "click-repl"
 version = "0.2.0"
 description = "REPL plugin for Click"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -612,6 +652,7 @@ six = "*"
 name = "climage"
 version = "0.1.3"
 description = "Convert images to beautiful ANSI escape codes"
+category = "main"
 optional = false
 python-versions = ">=3.2"
 files = [
@@ -627,6 +668,7 @@ Pillow = "*"
 name = "colorama"
 version = "0.4.6"
 description = "Cross-platform colored terminal text."
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 files = [
@@ -638,6 +680,7 @@ files = [
 name = "constantly"
 version = "15.1.0"
 description = "Symbolic constants in Python"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -649,6 +692,7 @@ files = [
 name = "contextlib2"
 version = "0.6.0.post1"
 description = "Backports and enhancements for the contextlib module"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -660,6 +704,7 @@ files = [
 name = "coreapi"
 version = "2.3.3"
 description = "Python client library for Core API."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -677,6 +722,7 @@ uritemplate = "*"
 name = "coreschema"
 version = "0.0.4"
 description = "Core Schema."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -691,6 +737,7 @@ jinja2 = "*"
 name = "courts-db"
 version = "0.10.9"
 description = "Database of Courts"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -702,6 +749,7 @@ files = [
 name = "cryptography"
 version = "36.0.2"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -742,6 +790,7 @@ test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0
 name = "cssselect"
 version = "1.2.0"
 description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -753,6 +802,7 @@ files = [
 name = "daphne"
 version = "4.0.0"
 description = "Django ASGI (HTTP/WebSocket) server"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -772,6 +822,7 @@ tests = ["django", "hypothesis", "pytest", "pytest-asyncio"]
 name = "datasketch"
 version = "1.5.7"
 description = "Probabilistic data structures for processing and searching very large datasets"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -794,6 +845,7 @@ test = ["cassandra-driver (>=3.20)", "coverage", "mock (>=2.0.0)", "mockredispy"
 name = "dateparser"
 version = "1.1.8"
 description = "Date parsing library designed to parse dates from HTML pages"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -816,6 +868,7 @@ langdetect = ["langdetect"]
 name = "decorator"
 version = "5.1.1"
 description = "Decorators for Humans"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -827,6 +880,7 @@ files = [
 name = "defusedxml"
 version = "0.7.1"
 description = "XML bomb protection for Python stdlib modules"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -838,6 +892,7 @@ files = [
 name = "dill"
 version = "0.3.6"
 description = "serialize all of python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -852,6 +907,7 @@ graph = ["objgraph (>=1.7.2)"]
 name = "disposable-email-domains"
 version = "0.0.64"
 description = "A set of disposable email domains"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -866,6 +922,7 @@ dev = ["check-manifest"]
 name = "distlib"
 version = "0.3.6"
 description = "Distribution utilities"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -877,6 +934,7 @@ files = [
 name = "django"
 version = "4.2.1"
 description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -897,6 +955,7 @@ bcrypt = ["bcrypt"]
 name = "django-admin-cursor-paginator"
 version = "0.1.2"
 description = "Drop-in replacement for django admin default pagination that works fast with huge tables."
+category = "main"
 optional = false
 python-versions = ">=3.4"
 files = [
@@ -911,6 +970,7 @@ Django = ">=2.0"
 name = "django-cache-memoize"
 version = "0.1.8"
 description = "Django utility for a memoization decorator that uses the Django cache framework."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -925,6 +985,7 @@ dev = ["black", "flake8", "therapist", "tox", "twine"]
 name = "django-cors-headers"
 version = "3.14.0"
 description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -939,6 +1000,7 @@ Django = ">=3.2"
 name = "django-debug-toolbar"
 version = "4.0.0"
 description = "A configurable set of panels that display various debug information about the current request/response."
+category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -954,6 +1016,7 @@ sqlparse = ">=0.2"
 name = "django-elasticsearch-dsl"
 version = "7.3"
 description = "Wrapper around elasticsearch-dsl-py for django models"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -969,6 +1032,7 @@ six = "*"
 name = "django-environ"
 version = "0.8.1"
 description = "A package that allows you to utilize 12factor inspired environment variables to configure your Django application."
+category = "main"
 optional = false
 python-versions = ">=3.4,<4"
 files = [
@@ -977,14 +1041,15 @@ files = [
 ]
 
 [package.extras]
-develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.dev0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
-docs = ["furo (>=2021.8.17b43,<2021.9.dev0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
+develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
+docs = ["furo (>=2021.8.17b43,<2021.9.0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
 testing = ["coverage[toml] (>=5.0a4)", "pytest (>=4.6.11)"]
 
 [[package]]
 name = "django-extensions"
 version = "3.2.1"
 description = "Extensions for Django"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -999,6 +1064,7 @@ Django = ">=3.2"
 name = "django-filter"
 version = "2.4.0"
 description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1013,6 +1079,7 @@ Django = ">=2.2"
 name = "django-hcaptcha"
 version = "0.2.0"
 description = "Django hCaptcha provides a simple way to protect your django forms using hCaptcha"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1024,6 +1091,7 @@ files = [
 name = "django-localflavor"
 version = "3.1"
 description = "Country-specific Django helpers"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1039,6 +1107,7 @@ python-stdnum = ">=1.6"
 name = "django-markdown-deux"
 version = "1.0.6"
 description = "a Django app that provides template tags for using Markdown (using the python-markdown2 processor)"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1052,6 +1121,7 @@ markdown2 = "*"
 name = "django-mathfilters"
 version = "1.0.0"
 description = "A set of simple math filters for Django"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1063,6 +1133,7 @@ files = [
 name = "django-override-storage"
 version = "0.3.2"
 description = "Django test helpers to manage file storage side effects."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -1074,6 +1145,7 @@ files = [
 name = "django-pghistory"
 version = "2.7.0"
 description = "History tracking for Django and Postgres"
+category = "main"
 optional = false
 python-versions = ">=3.7.0,<4"
 files = [
@@ -1089,6 +1161,7 @@ django-pgtrigger = ">=4.5.0"
 name = "django-pgtrigger"
 version = "4.6.0"
 description = "Postgres trigger support integrated with Django models."
+category = "main"
 optional = false
 python-versions = ">=3.7.0,<4"
 files = [
@@ -1103,6 +1176,7 @@ django = ">=2"
 name = "django-ratelimit"
 version = "4.0.0"
 description = "Cache-based rate-limiting for Django."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1114,6 +1188,7 @@ files = [
 name = "django-ses"
 version = "3.3.0"
 description = "A Django email backend for Amazon's Simple Email Service"
+category = "main"
 optional = false
 python-versions = ">=3.7,<4.0"
 files = [
@@ -1136,6 +1211,7 @@ events = ["cryptography (>=36.0.2)", "requests (>=2.27.1)"]
 name = "django-storages"
 version = "1.13.2"
 description = "Support for many storage backends in Django"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1158,6 +1234,7 @@ sftp = ["paramiko (>=1.10.0)"]
 name = "django-stubs"
 version = "4.2.0"
 description = "Mypy stubs for Django"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1181,6 +1258,7 @@ compatible-mypy = ["mypy (>=1.2.0,<1.3)"]
 name = "django-stubs-ext"
 version = "4.2.0"
 description = "Monkey-patching and extensions for django-stubs"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1196,6 +1274,7 @@ typing-extensions = "*"
 name = "django-waffle"
 version = "3.0.0"
 description = "A feature flipper for Django."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1207,6 +1286,7 @@ files = [
 name = "djangorestframework"
 version = "3.14.0"
 description = "Web APIs for Django, made easy."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1222,6 +1302,7 @@ pytz = "*"
 name = "djangorestframework-filters"
 version = "1.0.0.dev2"
 description = "Better filtering for Django REST Framework"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1237,6 +1318,7 @@ djangorestframework = "*"
 name = "djangorestframework-stubs"
 version = "3.14.0"
 description = "PEP-484 stubs for django-rest-framework"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1261,6 +1343,7 @@ markdown = ["types-Markdown (>=0.1.5)"]
 name = "djangorestframework-xml"
 version = "2.0.0"
 description = "XML support for Django REST Framework"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1280,6 +1363,7 @@ tests = ["Django (>=1.6)", "djangorestframework (>=2.4.3)", "flake8", "pytest",
 name = "docopt"
 version = "0.6.2"
 description = "Pythonic argument parser, that will make you smile"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1290,6 +1374,7 @@ files = [
 name = "drf-dynamic-fields"
 version = "0.3.1"
 description = "Dynamically return subset of Django REST Framework serializer fields"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1301,6 +1386,7 @@ files = [
 name = "elasticsearch"
 version = "7.17.9"
 description = "Python client for Elasticsearch"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
 files = [
@@ -1322,6 +1408,7 @@ requests = ["requests (>=2.4.0,<3.0.0)"]
 name = "elasticsearch-dsl"
 version = "7.4.0"
 description = "Python client for Elasticsearch"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1341,6 +1428,7 @@ develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytes
 name = "exceptiongroup"
 version = "1.1.1"
 description = "Backport of PEP 654 (exception groups)"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1355,6 +1443,7 @@ test = ["pytest (>=6)"]
 name = "executing"
 version = "1.1.0"
 description = "Get the currently executing AST node of a frame, and other information"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1369,6 +1458,7 @@ tests = ["asttokens", "littleutils", "pytest", "rich"]
 name = "exrex"
 version = "0.11.0"
 description = "Irregular methods for regular expressions"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -1380,6 +1470,7 @@ files = [
 name = "eyecite"
 version = "2.4.0"
 description = "Tool for extracting legal citations from text strings."
+category = "main"
 optional = false
 python-versions = ">=3.7,<4.0"
 files = [
@@ -1399,6 +1490,7 @@ reporters-db = ">=3.2.2,<4.0.0"
 name = "factory-boy"
 version = "3.2.1"
 description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1417,6 +1509,7 @@ doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"]
 name = "faker"
 version = "13.3.1"
 description = "Faker is a Python package that generates fake data for you."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1431,6 +1524,7 @@ python-dateutil = ">=2.4"
 name = "fast-diff-match-patch"
 version = "2.0.1"
 description = "fast_diff_match_patch: Python package wrapping the C++ implementation of google-diff-match-patch"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1506,6 +1600,7 @@ files = [
 name = "feedparser"
 version = "6.0.10"
 description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1520,6 +1615,7 @@ sgmllib3k = "*"
 name = "filelock"
 version = "3.9.0"
 description = "A platform independent file lock."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1535,6 +1631,7 @@ testing = ["covdefaults (>=2.2.2)", "coverage (>=7.0.1)", "pytest (>=7.2)", "pyt
 name = "flake8"
 version = "6.0.0"
 description = "the modular source code checker: pep8 pyflakes and co"
+category = "dev"
 optional = false
 python-versions = ">=3.8.1"
 files = [
@@ -1551,6 +1648,7 @@ pyflakes = ">=3.0.0,<3.1.0"
 name = "flynt"
 version = "0.78"
 description = "CLI tool to convert a python project's %-formatted strings to f-strings."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1569,6 +1667,7 @@ dev = ["build", "pre-commit", "pytest", "pytest-cov", "twine"]
 name = "future"
 version = "0.18.3"
 description = "Clean single-source support for Python 3 and 2"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -1579,6 +1678,7 @@ files = [
 name = "fuzzywuzzy"
 version = "0.18.0"
 description = "Fuzzy string matching in python"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1593,6 +1693,7 @@ speedup = ["python-levenshtein (>=0.12)"]
 name = "geonamescache"
 version = "1.6.0"
 description = "Geonames data for continents, cities and US states."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1604,6 +1705,7 @@ files = [
 name = "gunicorn"
 version = "20.1.0"
 description = "WSGI HTTP Server for UNIX"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1624,6 +1726,7 @@ tornado = ["tornado (>=0.2)"]
 name = "h11"
 version = "0.13.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1635,6 +1738,7 @@ files = [
 name = "html5lib"
 version = "1.1"
 description = "HTML parser based on the WHATWG HTML specification"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -1656,6 +1760,7 @@ lxml = ["lxml"]
 name = "httplib2"
 version = "0.22.0"
 description = "A comprehensive HTTP client library."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1670,6 +1775,7 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0
 name = "httptools"
 version = "0.5.0"
 description = "A collection of framework independent HTTP protocol utils."
+category = "main"
 optional = false
 python-versions = ">=3.5.0"
 files = [
@@ -1723,6 +1829,7 @@ test = ["Cython (>=0.29.24,<0.30.0)"]
 name = "hyperlink"
 version = "21.0.0"
 description = "A featureful, immutable, and correct URL for Python."
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1737,6 +1844,7 @@ idna = ">=2.5"
 name = "identify"
 version = "2.5.17"
 description = "File identification library for Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1751,6 +1859,7 @@ license = ["ukkonen"]
 name = "idna"
 version = "2.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1762,6 +1871,7 @@ files = [
 name = "igraph"
 version = "0.10.4"
 description = "High performance graph data structures and algorithms"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1828,6 +1938,7 @@ test-musl = ["networkx (>=2.5)", "pytest (>=7.0.1)", "pytest-timeout (>=2.1.0)"]
 name = "incremental"
 version = "22.10.0"
 description = "\"A small library that versions your Python projects.\""
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1843,6 +1954,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
 name = "iniconfig"
 version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1854,6 +1966,7 @@ files = [
 name = "internetarchive"
 version = "3.3.0"
 description = "A Python interface to archive.org."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1879,6 +1992,7 @@ types = ["tqdm-stubs (>=0.2.0)", "types-colorama", "types-docopt (>=0.6.10,<0.7.
 name = "ipaddress"
 version = "1.0.23"
 description = "IPv4/IPv6 manipulation library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1890,6 +2004,7 @@ files = [
 name = "ipython"
 version = "8.10.0"
 description = "IPython: Productive Interactive Computing"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1928,6 +2043,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa
 name = "isort"
 version = "5.8.0"
 description = "A Python utility / library to sort Python imports."
+category = "dev"
 optional = false
 python-versions = ">=3.6,<4.0"
 files = [
@@ -1944,6 +2060,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"]
 name = "itypes"
 version = "1.2.0"
 description = "Simple immutable types for python."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1955,6 +2072,7 @@ files = [
 name = "jedi"
 version = "0.18.1"
 description = "An autocompletion tool for Python that can be used for text editors."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1973,6 +2091,7 @@ testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"]
 name = "jinja2"
 version = "2.11.3"
 description = "A very fast and expressive template engine."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -1990,6 +2109,7 @@ i18n = ["Babel (>=0.8)"]
 name = "jmespath"
 version = "0.10.0"
 description = "JSON Matching Expressions"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -2001,6 +2121,7 @@ files = [
 name = "jsonpatch"
 version = "1.32"
 description = "Apply JSON-Patches (RFC 6902)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -2015,6 +2136,7 @@ jsonpointer = ">=1.9"
 name = "jsonpointer"
 version = "2.1"
 description = "Identify specific nodes in a JSON document (RFC 6901)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2026,6 +2148,7 @@ files = [
 name = "judge-pics"
 version = "2.0.2"
 description = "Database of Judge Pictures"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2045,6 +2168,7 @@ requests = ">=2.0,<3.0"
 name = "juriscraper"
 version = "2.5.49"
 description = "An API to scrape American court websites for metadata."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2072,6 +2196,7 @@ tldextract = "*"
 name = "kdtree"
 version = "0.16"
 description = "A Python implemntation of a kd-tree"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2083,6 +2208,7 @@ files = [
 name = "kombu"
 version = "5.3.0"
 description = "Messaging library for Python."
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2115,6 +2241,7 @@ zookeeper = ["kazoo (>=2.8.0)"]
 name = "lazy-object-proxy"
 version = "1.6.0"
 description = "A fast and thorough lazy object proxy."
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -2146,6 +2273,7 @@ files = [
 name = "lxml"
 version = "4.9.1"
 description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
 files = [
@@ -2231,6 +2359,7 @@ source = ["Cython (>=0.29.7)"]
 name = "lxml-stubs"
 version = "0.4.0"
 description = "Type annotations for the lxml package"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -2245,6 +2374,7 @@ test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1.
 name = "markdown2"
 version = "2.4.0"
 description = "A fast and complete Python implementation of Markdown"
+category = "main"
 optional = false
 python-versions = ">=3.5, <4"
 files = [
@@ -2256,6 +2386,7 @@ files = [
 name = "markupsafe"
 version = "1.1.1"
 description = "Safely add untrusted strings to HTML/XML markup."
+category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
 files = [
@@ -2317,6 +2448,7 @@ files = [
 name = "matplotlib-inline"
 version = "0.1.6"
 description = "Inline Matplotlib backend for Jupyter"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -2331,6 +2463,7 @@ traitlets = "*"
 name = "mccabe"
 version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2342,6 +2475,7 @@ files = [
 name = "mypy"
 version = "1.2.0"
 description = "Optional static typing for Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2388,6 +2522,7 @@ reports = ["lxml"]
 name = "mypy-extensions"
 version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
+category = "dev"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -2399,6 +2534,7 @@ files = [
 name = "nameparser"
 version = "1.1.1"
 description = "A simple Python module for parsing human names into their individual components."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2410,6 +2546,7 @@ files = [
 name = "natsort"
 version = "8.3.1"
 description = "Simple yet flexible natural sorting in Python."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2425,8 +2562,9 @@ icu = ["PyICU (>=1.0.0)"]
 name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
+category = "main"
 optional = false
-python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
+python-versions = ">=2.7,<3.0.0 || >=3.4.0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -2441,6 +2579,7 @@ PyOpenSSL = "*"
 name = "networkx"
 version = "3.1"
 description = "Python package for creating and manipulating graphs and networks"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2459,6 +2598,7 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
 name = "nodeenv"
 version = "1.7.0"
 description = "Node.js virtual environment builder"
+category = "dev"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
 files = [
@@ -2473,6 +2613,7 @@ setuptools = "*"
 name = "nose"
 version = "1.3.7"
 description = "nose extends unittest to make testing easier"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2485,6 +2626,7 @@ files = [
 name = "numpy"
 version = "1.24.2"
 description = "Fundamental package for array computing in Python"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2522,6 +2664,7 @@ files = [
 name = "openapi-codec"
 version = "1.3.2"
 description = "An OpenAPI codec for Core API."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2535,6 +2678,7 @@ coreapi = ">=2.2.0"
 name = "outcome"
 version = "1.2.0"
 description = "Capture the outcome of Python function calls."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2549,6 +2693,7 @@ attrs = ">=19.2.0"
 name = "packaging"
 version = "23.1"
 description = "Core utilities for Python packages"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2560,6 +2705,7 @@ files = [
 name = "pandas"
 version = "1.5.0"
 description = "Powerful data structures for data analysis, time series, and statistics"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2604,6 +2750,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
 name = "parso"
 version = "0.8.3"
 description = "A Python Parser"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2619,6 +2766,7 @@ testing = ["docopt", "pytest (<6.0.0)"]
 name = "pathspec"
 version = "0.9.0"
 description = "Utility library for gitignore style pattern matching of file paths."
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 files = [
@@ -2630,6 +2778,7 @@ files = [
 name = "pexpect"
 version = "4.8.0"
 description = "Pexpect allows easy control of interactive console applications."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2644,6 +2793,7 @@ ptyprocess = ">=0.5"
 name = "pickleshare"
 version = "0.7.5"
 description = "Tiny 'shelve'-like database with concurrency support"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2655,6 +2805,7 @@ files = [
 name = "pillow"
 version = "9.3.0"
 description = "Python Imaging Library (Fork)"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2729,6 +2880,7 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 name = "platformdirs"
 version = "2.5.1"
 description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2744,6 +2896,7 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock
 name = "pluggy"
 version = "0.13.1"
 description = "plugin and hook calling mechanisms for python"
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2758,6 +2911,7 @@ dev = ["pre-commit", "tox"]
 name = "pre-commit"
 version = "3.3.1"
 description = "A framework for managing and maintaining multi-language pre-commit hooks."
+category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2776,6 +2930,7 @@ virtualenv = ">=20.10.0"
 name = "probableparsing"
 version = "0.0.1"
 description = "Common methods for propbable parsers"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2787,6 +2942,7 @@ files = [
 name = "prompt-toolkit"
 version = "3.0.31"
 description = "Library for building powerful interactive command lines in Python"
+category = "main"
 optional = false
 python-versions = ">=3.6.2"
 files = [
@@ -2801,6 +2957,7 @@ wcwidth = "*"
 name = "psycopg2"
 version = "2.9.5"
 description = "psycopg2 - Python-PostgreSQL Database Adapter"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2823,6 +2980,7 @@ files = [
 name = "ptyprocess"
 version = "0.7.0"
 description = "Run a subprocess in a pseudo terminal"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2834,6 +2992,7 @@ files = [
 name = "pure-eval"
 version = "0.2.2"
 description = "Safely evaluate AST nodes without side effects"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2848,6 +3007,7 @@ tests = ["pytest"]
 name = "pyahocorasick"
 version = "1.4.2"
 description = "pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search.  With the ahocorasick.Automaton class, you can find multiple key strings occurrences at once in some input text.  You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search.  Implemented in C and tested on Python 2.7 and 3.4+.  Works on Linux, Mac and Windows. BSD-3-clause license."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2858,6 +3018,7 @@ files = [
 name = "pyasn1"
 version = "0.4.8"
 description = "ASN.1 types and codecs"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2869,6 +3030,7 @@ files = [
 name = "pyasn1-modules"
 version = "0.3.0"
 description = "A collection of ASN.1-based protocols modules"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
@@ -2883,6 +3045,7 @@ pyasn1 = ">=0.4.6,<0.6.0"
 name = "pycodestyle"
 version = "2.10.0"
 description = "Python style guide checker"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2894,6 +3057,7 @@ files = [
 name = "pycparser"
 version = "2.21"
 description = "C parser in Python"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2905,6 +3069,7 @@ files = [
 name = "pyflakes"
 version = "3.0.1"
 description = "passive checker of Python programs"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2916,6 +3081,7 @@ files = [
 name = "pygments"
 version = "2.13.0"
 description = "Pygments is a syntax highlighting package written in Python."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2930,6 +3096,7 @@ plugins = ["importlib-metadata"]
 name = "pylint"
 version = "2.17.3"
 description = "python code static checker"
+category = "dev"
 optional = false
 python-versions = ">=3.7.2"
 files = [
@@ -2958,6 +3125,7 @@ testutils = ["gitpython (>3)"]
 name = "pyopenssl"
 version = "20.0.1"
 description = "Python wrapper module around the OpenSSL library"
+category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
 files = [
@@ -2977,6 +3145,7 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"]
 name = "pyparsing"
 version = "2.4.7"
 description = "Python parsing module"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -2988,6 +3157,7 @@ files = [
 name = "pysocks"
 version = "1.7.1"
 description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3000,6 +3170,7 @@ files = [
 name = "pystemmer"
 version = "2.0.1"
 description = "Snowball stemming algorithms, for information retrieval"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3010,6 +3181,7 @@ files = [
 name = "pytest"
 version = "7.3.1"
 description = "pytest: simple powerful testing with Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3032,6 +3204,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
 name = "pytest-django"
 version = "4.5.2"
 description = "A Django plugin for pytest."
+category = "dev"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3050,6 +3223,7 @@ testing = ["Django", "django-configurations (>=2.0)"]
 name = "python-crfsuite"
 version = "0.9.9"
 description = "Python binding for CRFsuite"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3100,6 +3274,7 @@ files = [
 name = "python-dateutil"
 version = "2.8.2"
 description = "Extensions to the standard Python datetime module"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
@@ -3114,6 +3289,7 @@ six = ">=1.5"
 name = "python-dotenv"
 version = "1.0.0"
 description = "Read key-value pairs from a .env file and set them as environment variables"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -3128,6 +3304,7 @@ cli = ["click (>=5.0)"]
 name = "python-levenshtein"
 version = "0.12.2"
 description = "Python extension for computing string edit distances and similarities."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3141,6 +3318,7 @@ setuptools = "*"
 name = "python-magic"
 version = "0.4.22"
 description = "File type identification using libmagic"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -3152,6 +3330,7 @@ files = [
 name = "python-stdnum"
 version = "1.16"
 description = "Python module to handle standardized numbers and codes"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3168,6 +3347,7 @@ soap-fallback = ["PySimpleSOAP"]
 name = "pytz"
 version = "2021.1"
 description = "World timezone definitions, modern and historical"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3179,6 +3359,7 @@ files = [
 name = "pytz-deprecation-shim"
 version = "0.1.0.post0"
 description = "Shims to make deprecation of pytz easier"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
@@ -3193,6 +3374,7 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""}
 name = "pyyaml"
 version = "5.4.1"
 description = "YAML parser and emitter for Python"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -3229,22 +3411,28 @@ files = [
 
 [[package]]
 name = "redis"
-version = "3.5.3"
-description = "Python client for Redis key-value store"
+version = "4.5.5"
+description = "Python client for Redis database and key-value store"
+category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = ">=3.7"
 files = [
-    {file = "redis-3.5.3-py2.py3-none-any.whl", hash = "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24"},
-    {file = "redis-3.5.3.tar.gz", hash = "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2"},
+    {file = "redis-4.5.5-py3-none-any.whl", hash = "sha256:77929bc7f5dab9adf3acba2d3bb7d7658f1e0c2f1cafe7eb36434e751c471119"},
+    {file = "redis-4.5.5.tar.gz", hash = "sha256:dc87a0bdef6c8bfe1ef1e1c40be7034390c2ae02d92dcd0c7ca1729443899880"},
 ]
 
+[package.dependencies]
+async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2\""}
+
 [package.extras]
-hiredis = ["hiredis (>=0.1.3)"]
+hiredis = ["hiredis (>=1.0.0)"]
+ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"]
 
 [[package]]
 name = "regex"
 version = "2022.1.18"
 description = "Alternative regular expression module, to replace re."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3328,6 +3516,7 @@ files = [
 name = "reporters-db"
 version = "3.2.36"
 description = "Database of Court Reporters"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3342,6 +3531,7 @@ six = ">=1.0.0"
 name = "requests"
 version = "2.31.0"
 description = "Python HTTP for Humans."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3363,6 +3553,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 name = "requests-file"
 version = "1.5.1"
 description = "File transport adapter for Requests"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3378,6 +3569,7 @@ six = "*"
 name = "s3transfer"
 version = "0.3.6"
 description = "An Amazon S3 Transfer Manager"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3392,6 +3584,7 @@ botocore = ">=1.12.36,<2.0a.0"
 name = "schema"
 version = "0.7.4"
 description = "Simple data validation library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3406,6 +3599,7 @@ contextlib2 = ">=0.5.5"
 name = "scipy"
 version = "1.10.1"
 description = "Fundamental algorithms for scientific computing in Python"
+category = "main"
 optional = false
 python-versions = "<3.12,>=3.8"
 files = [
@@ -3444,6 +3638,7 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo
 name = "scorched"
 version = "0.13.1.dev0"
 description = ""
+category = "main"
 optional = false
 python-versions = "*"
 files = []
@@ -3466,6 +3661,7 @@ resolved_reference = "0632024e72e22a71e17cdb778805561f7cdd33d8"
 name = "selenium"
 version = "4.9.1"
 description = ""
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3483,6 +3679,7 @@ urllib3 = {version = ">=1.26,<3", extras = ["socks"]}
 name = "sentry-sdk"
 version = "1.14.0"
 description = "Python client for Sentry (https://sentry.io)"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3521,6 +3718,7 @@ tornado = ["tornado (>=5)"]
 name = "service-identity"
 version = "21.1.0"
 description = "Service identity verification for pyOpenSSL & cryptography."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3545,6 +3743,7 @@ tests = ["coverage[toml] (>=5.0.2)", "pytest"]
 name = "setuptools"
 version = "65.5.1"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3561,6 +3760,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (
 name = "sgmllib3k"
 version = "1.0.0"
 description = "Py3k port of sgmllib."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3571,6 +3771,7 @@ files = [
 name = "simplejson"
 version = "3.18.3"
 description = "Simple, fast, extensible JSON encoder/decoder for Python"
+category = "main"
 optional = false
 python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -3653,6 +3854,7 @@ files = [
 name = "six"
 version = "1.15.0"
 description = "Python 2 and 3 compatibility utilities"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -3664,6 +3866,7 @@ files = [
 name = "sniffio"
 version = "1.3.0"
 description = "Sniff out which async library your code is running under"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3675,6 +3878,7 @@ files = [
 name = "sortedcontainers"
 version = "2.4.0"
 description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3686,6 +3890,7 @@ files = [
 name = "soupsieve"
 version = "2.2.1"
 description = "A modern CSS selector implementation for Beautiful Soup."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -3697,6 +3902,7 @@ files = [
 name = "sqlparse"
 version = "0.4.4"
 description = "A non-validating SQL parser."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3713,6 +3919,7 @@ test = ["pytest", "pytest-cov"]
 name = "stack-data"
 version = "0.5.1"
 description = "Extract data from python stack frames and tracebacks for informative displays"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3732,6 +3939,7 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 name = "stripe"
 version = "5.2.0"
 description = "Python bindings for the Stripe API"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3746,6 +3954,7 @@ requests = {version = ">=2.20", markers = "python_version >= \"3.0\""}
 name = "tblib"
 version = "1.7.0"
 description = "Traceback serialization library."
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -3757,6 +3966,7 @@ files = [
 name = "texttable"
 version = "1.6.4"
 description = "module for creating simple ASCII tables"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3768,6 +3978,7 @@ files = [
 name = "time-machine"
 version = "2.9.0"
 description = "Travel through time in your tests."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3833,6 +4044,7 @@ python-dateutil = "*"
 name = "timeout-decorator"
 version = "0.5.0"
 description = "Timeout decorator"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3843,6 +4055,7 @@ files = [
 name = "tldextract"
 version = "3.4.0"
 description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3860,6 +4073,7 @@ requests-file = ">=1.4"
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3871,6 +4085,7 @@ files = [
 name = "tomlkit"
 version = "0.11.8"
 description = "Style preserving TOML library"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3882,6 +4097,7 @@ files = [
 name = "tqdm"
 version = "4.59.0"
 description = "Fast, Extensible Progress Meter"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
 files = [
@@ -3898,6 +4114,7 @@ telegram = ["requests"]
 name = "traitlets"
 version = "5.4.0"
 description = ""
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3912,6 +4129,7 @@ test = ["pre-commit", "pytest"]
 name = "trio"
 version = "0.21.0"
 description = "A friendly Python library for async concurrency and I/O"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3932,6 +4150,7 @@ sortedcontainers = "*"
 name = "trio-websocket"
 version = "0.9.2"
 description = "WebSocket library for Trio"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3948,6 +4167,7 @@ wsproto = ">=0.14"
 name = "twisted"
 version = "22.4.0"
 description = "An asynchronous networking framework written in Python"
+category = "main"
 optional = false
 python-versions = ">=3.6.7"
 files = [
@@ -3988,6 +4208,7 @@ windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.
 name = "twisted-iocpsupport"
 version = "1.0.3"
 description = "An extension for use in the twisted I/O Completion Ports reactor."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4013,6 +4234,7 @@ files = [
 name = "txaio"
 version = "23.1.1"
 description = "Compatibility API between asyncio/Twisted/Trollius"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4029,6 +4251,7 @@ twisted = ["twisted (>=20.3.0)", "zope.interface (>=5.2.0)"]
 name = "types-dateparser"
 version = "1.1.4.6"
 description = "Typing stubs for dateparser"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4040,6 +4263,7 @@ files = [
 name = "types-pyopenssl"
 version = "23.0.0.4"
 description = "Typing stubs for pyOpenSSL"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4054,6 +4278,7 @@ cryptography = ">=35.0.0"
 name = "types-python-dateutil"
 version = "2.8.19.12"
 description = "Typing stubs for python-dateutil"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4065,6 +4290,7 @@ files = [
 name = "types-pytz"
 version = "2021.3.5"
 description = "Typing stubs for pytz"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4076,6 +4302,7 @@ files = [
 name = "types-pyyaml"
 version = "6.0.4"
 description = "Typing stubs for PyYAML"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4087,6 +4314,7 @@ files = [
 name = "types-redis"
 version = "4.5.4.1"
 description = "Typing stubs for redis"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4102,6 +4330,7 @@ types-pyOpenSSL = "*"
 name = "types-requests"
 version = "2.29.0.0"
 description = "Typing stubs for requests"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4116,6 +4345,7 @@ types-urllib3 = "<1.27"
 name = "types-simplejson"
 version = "3.19.0.0"
 description = "Typing stubs for simplejson"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4127,6 +4357,7 @@ files = [
 name = "types-urllib3"
 version = "1.26.11"
 description = "Typing stubs for urllib3"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4138,6 +4369,7 @@ files = [
 name = "typing-extensions"
 version = "4.1.1"
 description = "Backported and Experimental Type Hints for Python 3.6+"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4149,6 +4381,7 @@ files = [
 name = "tzdata"
 version = "2022.7"
 description = "Provider of IANA time zone data"
+category = "main"
 optional = false
 python-versions = ">=2"
 files = [
@@ -4160,6 +4393,7 @@ files = [
 name = "tzlocal"
 version = "4.2"
 description = "tzinfo object for the local timezone"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4179,6 +4413,7 @@ test = ["pytest (>=4.3)", "pytest-mock (>=3.3)"]
 name = "unidecode"
 version = "1.2.0"
 description = "ASCII transliterations of Unicode text"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -4190,6 +4425,7 @@ files = [
 name = "uritemplate"
 version = "3.0.1"
 description = "URI templates"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -4201,6 +4437,7 @@ files = [
 name = "urllib3"
 version = "1.26.15"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -4220,6 +4457,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 name = "usaddress"
 version = "0.5.10"
 description = "Parse US addresses using conditional random fields"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4236,6 +4474,7 @@ python-crfsuite = ">=0.7"
 name = "uvicorn"
 version = "0.22.0"
 description = "The lightning-fast ASGI server."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4250,7 +4489,7 @@ h11 = ">=0.8"
 httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""}
 python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""}
 pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""}
-uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""}
+uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""}
 watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""}
 websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""}
 
@@ -4261,6 +4500,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
 name = "uvloop"
 version = "0.17.0"
 description = "Fast implementation of asyncio event loop on top of libuv"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4305,6 +4545,7 @@ test = ["Cython (>=0.29.32,<0.30.0)", "aiohttp", "flake8 (>=3.9.2,<3.10.0)", "my
 name = "vine"
 version = "5.0.0"
 description = "Promises, promises, promises."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4316,6 +4557,7 @@ files = [
 name = "virtualenv"
 version = "20.17.1"
 description = "Virtual Python Environment builder"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4336,6 +4578,7 @@ testing = ["coverage (>=6.2)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7
 name = "watchfiles"
 version = "0.19.0"
 description = "Simple, modern and high performance file watching and code reload in python."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4370,6 +4613,7 @@ anyio = ">=3.0.0"
 name = "wcwidth"
 version = "0.2.5"
 description = "Measures the displayed width of unicode strings in a terminal"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4381,6 +4625,7 @@ files = [
 name = "webencodings"
 version = "0.5.1"
 description = "Character encoding aliases for legacy web content"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4392,6 +4637,7 @@ files = [
 name = "websockets"
 version = "11.0.3"
 description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4471,6 +4717,7 @@ files = [
 name = "wrapt"
 version = "1.15.0"
 description = "Module for decorators, wrappers and monkey patching."
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 files = [
@@ -4555,6 +4802,7 @@ files = [
 name = "wsproto"
 version = "1.2.0"
 description = "WebSockets state-machine based protocol implementation"
+category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -4569,6 +4817,7 @@ h11 = ">=0.9.0,<1"
 name = "zope-interface"
 version = "6.0"
 description = "Interfaces for Python"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4615,4 +4864,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10, <3.12"
-content-hash = "d435d4c7dce4af1c659d4dbe2d712e2091a1514a80439b4e4009404f69bef595"
+content-hash = "31793909108232d01e0af0d04665aed89ecf132877c5d9b7bfa6b6be06b18dd1"
diff --git a/pyproject.toml b/pyproject.toml
index 5ec87e6e65..462d55a9a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,7 +72,7 @@ python-dateutil = "^2.8.1"
 python-magic = "^0.4.21"
 pytz = "*"
 pyyaml = "^5.3.1"
-redis = "^3.5.3"
+redis = "^4.5.5"
 requests = "^2.31.0"
 simplejson = "^3.18.3"
 stripe = "^5.2.0"

From 228d4317a8462f5914f9ceb69b76aa56269eba98 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 23:04:51 +0000
Subject: [PATCH 002/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e531956054..e9052f9eaf 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ This repository is organized in the following way:
  - scripts: logrotate, systemd, etc, and init scripts for our various configurations and daemons.
 
 
-## Getting Involved 
+## Getting Involved
 
 If you want to get involved send us an email with your contact info or take a look through the [issues list][issues]. There are innumerable things we need help with, but we especially are looking for help with:
 

From 0f6cbf528513b4dbe6d3ebb8f2a0f95720718973 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 23:07:57 +0000
Subject: [PATCH 003/372] build(deps): bump drf-dynamic-fields from 0.3.1 to
 0.4.0

Bumps [drf-dynamic-fields](https://github.com/dbrgn/drf-dynamic-fields) from 0.3.1 to 0.4.0.
- [Changelog](https://github.com/dbrgn/drf-dynamic-fields/blob/master/CHANGELOG.md)
- [Commits](https://github.com/dbrgn/drf-dynamic-fields/compare/v0.3.1...v0.4.0)

---
updated-dependencies:
- dependency-name: drf-dynamic-fields
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock | 249 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 241 insertions(+), 8 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index d923189ec5..5cbfed0e6a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,9 +1,10 @@
-# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
+# This file is automatically @generated by Poetry and should not be changed by hand.
 
 [[package]]
 name = "amqp"
 version = "5.1.1"
 description = "Low-level AMQP client for Python (fork of amqplib)."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -18,6 +19,7 @@ vine = ">=5.0.0"
 name = "anyio"
 version = "3.6.2"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
+category = "main"
 optional = false
 python-versions = ">=3.6.2"
 files = [
@@ -38,6 +40,7 @@ trio = ["trio (>=0.16,<0.22)"]
 name = "appnope"
 version = "0.1.3"
 description = "Disable App Nap on macOS >= 10.9"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -49,6 +52,7 @@ files = [
 name = "argparse"
 version = "1.4.0"
 description = "Python command-line parsing library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -60,6 +64,7 @@ files = [
 name = "asgiref"
 version = "3.6.0"
 description = "ASGI specs, helper code, and adapters"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -74,6 +79,7 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
 name = "astor"
 version = "0.8.1"
 description = "Read/rewrite/write Python ASTs"
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
 files = [
@@ -85,6 +91,7 @@ files = [
 name = "astroid"
 version = "2.15.4"
 description = "An abstract syntax tree for Python with inference support."
+category = "dev"
 optional = false
 python-versions = ">=3.7.2"
 files = [
@@ -104,6 +111,7 @@ wrapt = [
 name = "asttokens"
 version = "2.0.8"
 description = "Annotate AST trees with source code positions"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -121,6 +129,7 @@ test = ["astroid (<=2.5.3)", "pytest"]
 name = "async-generator"
 version = "1.10"
 description = "Async generators and context managers for Python 3.5+"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -132,6 +141,7 @@ files = [
 name = "attrs"
 version = "20.3.0"
 description = "Classes Without Boilerplate"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -149,6 +159,7 @@ tests-no-zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>
 name = "autobahn"
 version = "23.1.2"
 description = "WebSocket client & server library, WAMP real-time framework"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -177,6 +188,7 @@ xbr = ["base58 (>=2.1.0)", "cbor2 (>=5.2.0)", "click (>=8.1.2)", "ecdsa (>=0.16.
 name = "automat"
 version = "22.10.0"
 description = "Self-service finite-state machines for the programmer on the go."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -195,6 +207,7 @@ visualize = ["Twisted (>=16.1.1)", "graphviz (>0.5.1)"]
 name = "backcall"
 version = "0.2.0"
 description = "Specifications for callback functions passed in to an API"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -206,6 +219,7 @@ files = [
 name = "beautifulsoup4"
 version = "4.11.2"
 description = "Screen-scraping library"
+category = "main"
 optional = false
 python-versions = ">=3.6.0"
 files = [
@@ -224,6 +238,7 @@ lxml = ["lxml"]
 name = "billiard"
 version = "4.1.0"
 description = "Python multiprocessing fork with improvements and bugfixes"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -235,6 +250,7 @@ files = [
 name = "black"
 version = "23.3.0"
 description = "The uncompromising code formatter."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -283,6 +299,7 @@ uvloop = ["uvloop (>=0.15.2)"]
 name = "boto3"
 version = "1.17.43"
 description = "The AWS SDK for Python"
+category = "main"
 optional = false
 python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -299,6 +316,7 @@ s3transfer = ">=0.3.0,<0.4.0"
 name = "botocore"
 version = "1.20.43"
 description = "Low-level, data-driven core of boto 3."
+category = "main"
 optional = false
 python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -318,6 +336,7 @@ crt = ["awscrt (==0.10.8)"]
 name = "celery"
 version = "5.3.0"
 description = "Distributed Task Queue."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -373,6 +392,7 @@ zstd = ["zstandard (==0.21.0)"]
 name = "certifi"
 version = "2022.12.7"
 description = "Python package for providing Mozilla's CA Bundle."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -384,6 +404,7 @@ files = [
 name = "cffi"
 version = "1.14.5"
 description = "Foreign Function Interface for Python calling C code."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -445,6 +466,7 @@ pycparser = "*"
 name = "cfgv"
 version = "3.3.1"
 description = "Validate configuration and produce human readable error messages."
+category = "dev"
 optional = false
 python-versions = ">=3.6.1"
 files = [
@@ -456,6 +478,7 @@ files = [
 name = "chardet"
 version = "5.1.0"
 description = "Universal encoding detector for Python 3"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -467,6 +490,7 @@ files = [
 name = "charset-normalizer"
 version = "3.1.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -551,6 +575,7 @@ files = [
 name = "click"
 version = "8.1.2"
 description = "Composable command line interface toolkit"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -565,6 +590,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
 name = "click-didyoumean"
 version = "0.3.0"
 description = "Enables git-like *did-you-mean* feature in click"
+category = "main"
 optional = false
 python-versions = ">=3.6.2,<4.0.0"
 files = [
@@ -579,6 +605,7 @@ click = ">=7"
 name = "click-plugins"
 version = "1.1.1"
 description = "An extension module for click to enable registering CLI commands via setuptools entry-points."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -596,6 +623,7 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"]
 name = "click-repl"
 version = "0.2.0"
 description = "REPL plugin for Click"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -612,6 +640,7 @@ six = "*"
 name = "climage"
 version = "0.1.3"
 description = "Convert images to beautiful ANSI escape codes"
+category = "main"
 optional = false
 python-versions = ">=3.2"
 files = [
@@ -627,6 +656,7 @@ Pillow = "*"
 name = "colorama"
 version = "0.4.6"
 description = "Cross-platform colored terminal text."
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 files = [
@@ -638,6 +668,7 @@ files = [
 name = "constantly"
 version = "15.1.0"
 description = "Symbolic constants in Python"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -649,6 +680,7 @@ files = [
 name = "contextlib2"
 version = "0.6.0.post1"
 description = "Backports and enhancements for the contextlib module"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -660,6 +692,7 @@ files = [
 name = "coreapi"
 version = "2.3.3"
 description = "Python client library for Core API."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -677,6 +710,7 @@ uritemplate = "*"
 name = "coreschema"
 version = "0.0.4"
 description = "Core Schema."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -691,6 +725,7 @@ jinja2 = "*"
 name = "courts-db"
 version = "0.10.9"
 description = "Database of Courts"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -702,6 +737,7 @@ files = [
 name = "cryptography"
 version = "36.0.2"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -742,6 +778,7 @@ test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0
 name = "cssselect"
 version = "1.2.0"
 description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -753,6 +790,7 @@ files = [
 name = "daphne"
 version = "4.0.0"
 description = "Django ASGI (HTTP/WebSocket) server"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -772,6 +810,7 @@ tests = ["django", "hypothesis", "pytest", "pytest-asyncio"]
 name = "datasketch"
 version = "1.5.7"
 description = "Probabilistic data structures for processing and searching very large datasets"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -794,6 +833,7 @@ test = ["cassandra-driver (>=3.20)", "coverage", "mock (>=2.0.0)", "mockredispy"
 name = "dateparser"
 version = "1.1.8"
 description = "Date parsing library designed to parse dates from HTML pages"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -816,6 +856,7 @@ langdetect = ["langdetect"]
 name = "decorator"
 version = "5.1.1"
 description = "Decorators for Humans"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -827,6 +868,7 @@ files = [
 name = "defusedxml"
 version = "0.7.1"
 description = "XML bomb protection for Python stdlib modules"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -838,6 +880,7 @@ files = [
 name = "dill"
 version = "0.3.6"
 description = "serialize all of python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -852,6 +895,7 @@ graph = ["objgraph (>=1.7.2)"]
 name = "disposable-email-domains"
 version = "0.0.64"
 description = "A set of disposable email domains"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -866,6 +910,7 @@ dev = ["check-manifest"]
 name = "distlib"
 version = "0.3.6"
 description = "Distribution utilities"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -877,6 +922,7 @@ files = [
 name = "django"
 version = "4.2.1"
 description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -897,6 +943,7 @@ bcrypt = ["bcrypt"]
 name = "django-admin-cursor-paginator"
 version = "0.1.2"
 description = "Drop-in replacement for django admin default pagination that works fast with huge tables."
+category = "main"
 optional = false
 python-versions = ">=3.4"
 files = [
@@ -911,6 +958,7 @@ Django = ">=2.0"
 name = "django-cache-memoize"
 version = "0.1.8"
 description = "Django utility for a memoization decorator that uses the Django cache framework."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -925,6 +973,7 @@ dev = ["black", "flake8", "therapist", "tox", "twine"]
 name = "django-cors-headers"
 version = "3.14.0"
 description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -939,6 +988,7 @@ Django = ">=3.2"
 name = "django-debug-toolbar"
 version = "4.0.0"
 description = "A configurable set of panels that display various debug information about the current request/response."
+category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -954,6 +1004,7 @@ sqlparse = ">=0.2"
 name = "django-elasticsearch-dsl"
 version = "7.3"
 description = "Wrapper around elasticsearch-dsl-py for django models"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -969,6 +1020,7 @@ six = "*"
 name = "django-environ"
 version = "0.8.1"
 description = "A package that allows you to utilize 12factor inspired environment variables to configure your Django application."
+category = "main"
 optional = false
 python-versions = ">=3.4,<4"
 files = [
@@ -977,14 +1029,15 @@ files = [
 ]
 
 [package.extras]
-develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.dev0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
-docs = ["furo (>=2021.8.17b43,<2021.9.dev0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
+develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
+docs = ["furo (>=2021.8.17b43,<2021.9.0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
 testing = ["coverage[toml] (>=5.0a4)", "pytest (>=4.6.11)"]
 
 [[package]]
 name = "django-extensions"
 version = "3.2.1"
 description = "Extensions for Django"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -999,6 +1052,7 @@ Django = ">=3.2"
 name = "django-filter"
 version = "2.4.0"
 description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1013,6 +1067,7 @@ Django = ">=2.2"
 name = "django-hcaptcha"
 version = "0.2.0"
 description = "Django hCaptcha provides a simple way to protect your django forms using hCaptcha"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1024,6 +1079,7 @@ files = [
 name = "django-localflavor"
 version = "3.1"
 description = "Country-specific Django helpers"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1039,6 +1095,7 @@ python-stdnum = ">=1.6"
 name = "django-markdown-deux"
 version = "1.0.6"
 description = "a Django app that provides template tags for using Markdown (using the python-markdown2 processor)"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1052,6 +1109,7 @@ markdown2 = "*"
 name = "django-mathfilters"
 version = "1.0.0"
 description = "A set of simple math filters for Django"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1063,6 +1121,7 @@ files = [
 name = "django-override-storage"
 version = "0.3.2"
 description = "Django test helpers to manage file storage side effects."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -1074,6 +1133,7 @@ files = [
 name = "django-pghistory"
 version = "2.7.0"
 description = "History tracking for Django and Postgres"
+category = "main"
 optional = false
 python-versions = ">=3.7.0,<4"
 files = [
@@ -1089,6 +1149,7 @@ django-pgtrigger = ">=4.5.0"
 name = "django-pgtrigger"
 version = "4.6.0"
 description = "Postgres trigger support integrated with Django models."
+category = "main"
 optional = false
 python-versions = ">=3.7.0,<4"
 files = [
@@ -1103,6 +1164,7 @@ django = ">=2"
 name = "django-ratelimit"
 version = "4.0.0"
 description = "Cache-based rate-limiting for Django."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1114,6 +1176,7 @@ files = [
 name = "django-ses"
 version = "3.3.0"
 description = "A Django email backend for Amazon's Simple Email Service"
+category = "main"
 optional = false
 python-versions = ">=3.7,<4.0"
 files = [
@@ -1136,6 +1199,7 @@ events = ["cryptography (>=36.0.2)", "requests (>=2.27.1)"]
 name = "django-storages"
 version = "1.13.2"
 description = "Support for many storage backends in Django"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1158,6 +1222,7 @@ sftp = ["paramiko (>=1.10.0)"]
 name = "django-stubs"
 version = "4.2.0"
 description = "Mypy stubs for Django"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1181,6 +1246,7 @@ compatible-mypy = ["mypy (>=1.2.0,<1.3)"]
 name = "django-stubs-ext"
 version = "4.2.0"
 description = "Monkey-patching and extensions for django-stubs"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1196,6 +1262,7 @@ typing-extensions = "*"
 name = "django-waffle"
 version = "3.0.0"
 description = "A feature flipper for Django."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1207,6 +1274,7 @@ files = [
 name = "djangorestframework"
 version = "3.14.0"
 description = "Web APIs for Django, made easy."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1222,6 +1290,7 @@ pytz = "*"
 name = "djangorestframework-filters"
 version = "1.0.0.dev2"
 description = "Better filtering for Django REST Framework"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1237,6 +1306,7 @@ djangorestframework = "*"
 name = "djangorestframework-stubs"
 version = "3.14.0"
 description = "PEP-484 stubs for django-rest-framework"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1261,6 +1331,7 @@ markdown = ["types-Markdown (>=0.1.5)"]
 name = "djangorestframework-xml"
 version = "2.0.0"
 description = "XML support for Django REST Framework"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1280,6 +1351,7 @@ tests = ["Django (>=1.6)", "djangorestframework (>=2.4.3)", "flake8", "pytest",
 name = "docopt"
 version = "0.6.2"
 description = "Pythonic argument parser, that will make you smile"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1288,19 +1360,21 @@ files = [
 
 [[package]]
 name = "drf-dynamic-fields"
-version = "0.3.1"
+version = "0.4.0"
 description = "Dynamically return subset of Django REST Framework serializer fields"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "drf_dynamic_fields-0.3.1-py2.py3-none-any.whl", hash = "sha256:fa5a7ea010476184d776b4b977d57d0090e651e8f897d83ed0c2f2bca9cbf704"},
-    {file = "drf_dynamic_fields-0.3.1.tar.gz", hash = "sha256:de75969abff74332f339d082931f1815dc91c2ff1ed6e741bd33d1d5057dceb1"},
+    {file = "drf_dynamic_fields-0.4.0-py2.py3-none-any.whl", hash = "sha256:48b879fe899905bc18593a61bca43e3b595dc3431b3b4ee499a9fd6c9a53f98c"},
+    {file = "drf_dynamic_fields-0.4.0.tar.gz", hash = "sha256:f20a5ec27d003db7595c9315db22217493dcaed575f3811d3e12f264c791c20c"},
 ]
 
 [[package]]
 name = "elasticsearch"
 version = "7.17.9"
 description = "Python client for Elasticsearch"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
 files = [
@@ -1322,6 +1396,7 @@ requests = ["requests (>=2.4.0,<3.0.0)"]
 name = "elasticsearch-dsl"
 version = "7.4.0"
 description = "Python client for Elasticsearch"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1341,6 +1416,7 @@ develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytes
 name = "exceptiongroup"
 version = "1.1.1"
 description = "Backport of PEP 654 (exception groups)"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1355,6 +1431,7 @@ test = ["pytest (>=6)"]
 name = "executing"
 version = "1.1.0"
 description = "Get the currently executing AST node of a frame, and other information"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1369,6 +1446,7 @@ tests = ["asttokens", "littleutils", "pytest", "rich"]
 name = "exrex"
 version = "0.11.0"
 description = "Irregular methods for regular expressions"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -1380,6 +1458,7 @@ files = [
 name = "eyecite"
 version = "2.4.0"
 description = "Tool for extracting legal citations from text strings."
+category = "main"
 optional = false
 python-versions = ">=3.7,<4.0"
 files = [
@@ -1399,6 +1478,7 @@ reporters-db = ">=3.2.2,<4.0.0"
 name = "factory-boy"
 version = "3.2.1"
 description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1417,6 +1497,7 @@ doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"]
 name = "faker"
 version = "13.3.1"
 description = "Faker is a Python package that generates fake data for you."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1431,6 +1512,7 @@ python-dateutil = ">=2.4"
 name = "fast-diff-match-patch"
 version = "2.0.1"
 description = "fast_diff_match_patch: Python package wrapping the C++ implementation of google-diff-match-patch"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1506,6 +1588,7 @@ files = [
 name = "feedparser"
 version = "6.0.10"
 description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1520,6 +1603,7 @@ sgmllib3k = "*"
 name = "filelock"
 version = "3.9.0"
 description = "A platform independent file lock."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1535,6 +1619,7 @@ testing = ["covdefaults (>=2.2.2)", "coverage (>=7.0.1)", "pytest (>=7.2)", "pyt
 name = "flake8"
 version = "6.0.0"
 description = "the modular source code checker: pep8 pyflakes and co"
+category = "dev"
 optional = false
 python-versions = ">=3.8.1"
 files = [
@@ -1551,6 +1636,7 @@ pyflakes = ">=3.0.0,<3.1.0"
 name = "flynt"
 version = "0.78"
 description = "CLI tool to convert a python project's %-formatted strings to f-strings."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1569,6 +1655,7 @@ dev = ["build", "pre-commit", "pytest", "pytest-cov", "twine"]
 name = "future"
 version = "0.18.3"
 description = "Clean single-source support for Python 3 and 2"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -1579,6 +1666,7 @@ files = [
 name = "fuzzywuzzy"
 version = "0.18.0"
 description = "Fuzzy string matching in python"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1593,6 +1681,7 @@ speedup = ["python-levenshtein (>=0.12)"]
 name = "geonamescache"
 version = "1.6.0"
 description = "Geonames data for continents, cities and US states."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1604,6 +1693,7 @@ files = [
 name = "gunicorn"
 version = "20.1.0"
 description = "WSGI HTTP Server for UNIX"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1624,6 +1714,7 @@ tornado = ["tornado (>=0.2)"]
 name = "h11"
 version = "0.13.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1635,6 +1726,7 @@ files = [
 name = "html5lib"
 version = "1.1"
 description = "HTML parser based on the WHATWG HTML specification"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -1656,6 +1748,7 @@ lxml = ["lxml"]
 name = "httplib2"
 version = "0.22.0"
 description = "A comprehensive HTTP client library."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1670,6 +1763,7 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0
 name = "httptools"
 version = "0.5.0"
 description = "A collection of framework independent HTTP protocol utils."
+category = "main"
 optional = false
 python-versions = ">=3.5.0"
 files = [
@@ -1723,6 +1817,7 @@ test = ["Cython (>=0.29.24,<0.30.0)"]
 name = "hyperlink"
 version = "21.0.0"
 description = "A featureful, immutable, and correct URL for Python."
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1737,6 +1832,7 @@ idna = ">=2.5"
 name = "identify"
 version = "2.5.17"
 description = "File identification library for Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1751,6 +1847,7 @@ license = ["ukkonen"]
 name = "idna"
 version = "2.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1762,6 +1859,7 @@ files = [
 name = "igraph"
 version = "0.10.4"
 description = "High performance graph data structures and algorithms"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1828,6 +1926,7 @@ test-musl = ["networkx (>=2.5)", "pytest (>=7.0.1)", "pytest-timeout (>=2.1.0)"]
 name = "incremental"
 version = "22.10.0"
 description = "\"A small library that versions your Python projects.\""
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1843,6 +1942,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
 name = "iniconfig"
 version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1854,6 +1954,7 @@ files = [
 name = "internetarchive"
 version = "3.3.0"
 description = "A Python interface to archive.org."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1879,6 +1980,7 @@ types = ["tqdm-stubs (>=0.2.0)", "types-colorama", "types-docopt (>=0.6.10,<0.7.
 name = "ipaddress"
 version = "1.0.23"
 description = "IPv4/IPv6 manipulation library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1890,6 +1992,7 @@ files = [
 name = "ipython"
 version = "8.10.0"
 description = "IPython: Productive Interactive Computing"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1928,6 +2031,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa
 name = "isort"
 version = "5.8.0"
 description = "A Python utility / library to sort Python imports."
+category = "dev"
 optional = false
 python-versions = ">=3.6,<4.0"
 files = [
@@ -1944,6 +2048,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"]
 name = "itypes"
 version = "1.2.0"
 description = "Simple immutable types for python."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1955,6 +2060,7 @@ files = [
 name = "jedi"
 version = "0.18.1"
 description = "An autocompletion tool for Python that can be used for text editors."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1973,6 +2079,7 @@ testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"]
 name = "jinja2"
 version = "2.11.3"
 description = "A very fast and expressive template engine."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -1990,6 +2097,7 @@ i18n = ["Babel (>=0.8)"]
 name = "jmespath"
 version = "0.10.0"
 description = "JSON Matching Expressions"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -2001,6 +2109,7 @@ files = [
 name = "jsonpatch"
 version = "1.32"
 description = "Apply JSON-Patches (RFC 6902)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -2015,6 +2124,7 @@ jsonpointer = ">=1.9"
 name = "jsonpointer"
 version = "2.1"
 description = "Identify specific nodes in a JSON document (RFC 6901)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2026,6 +2136,7 @@ files = [
 name = "judge-pics"
 version = "2.0.2"
 description = "Database of Judge Pictures"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2045,6 +2156,7 @@ requests = ">=2.0,<3.0"
 name = "juriscraper"
 version = "2.5.49"
 description = "An API to scrape American court websites for metadata."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2072,6 +2184,7 @@ tldextract = "*"
 name = "kdtree"
 version = "0.16"
 description = "A Python implemntation of a kd-tree"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2083,6 +2196,7 @@ files = [
 name = "kombu"
 version = "5.3.0"
 description = "Messaging library for Python."
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2115,6 +2229,7 @@ zookeeper = ["kazoo (>=2.8.0)"]
 name = "lazy-object-proxy"
 version = "1.6.0"
 description = "A fast and thorough lazy object proxy."
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -2146,6 +2261,7 @@ files = [
 name = "lxml"
 version = "4.9.1"
 description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
 files = [
@@ -2231,6 +2347,7 @@ source = ["Cython (>=0.29.7)"]
 name = "lxml-stubs"
 version = "0.4.0"
 description = "Type annotations for the lxml package"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -2245,6 +2362,7 @@ test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1.
 name = "markdown2"
 version = "2.4.0"
 description = "A fast and complete Python implementation of Markdown"
+category = "main"
 optional = false
 python-versions = ">=3.5, <4"
 files = [
@@ -2256,6 +2374,7 @@ files = [
 name = "markupsafe"
 version = "1.1.1"
 description = "Safely add untrusted strings to HTML/XML markup."
+category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
 files = [
@@ -2317,6 +2436,7 @@ files = [
 name = "matplotlib-inline"
 version = "0.1.6"
 description = "Inline Matplotlib backend for Jupyter"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -2331,6 +2451,7 @@ traitlets = "*"
 name = "mccabe"
 version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2342,6 +2463,7 @@ files = [
 name = "mypy"
 version = "1.2.0"
 description = "Optional static typing for Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2388,6 +2510,7 @@ reports = ["lxml"]
 name = "mypy-extensions"
 version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
+category = "dev"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -2399,6 +2522,7 @@ files = [
 name = "nameparser"
 version = "1.1.1"
 description = "A simple Python module for parsing human names into their individual components."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2410,6 +2534,7 @@ files = [
 name = "natsort"
 version = "8.3.1"
 description = "Simple yet flexible natural sorting in Python."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2425,8 +2550,9 @@ icu = ["PyICU (>=1.0.0)"]
 name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
+category = "main"
 optional = false
-python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
+python-versions = ">=2.7,<3.0.0 || >=3.4.0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -2441,6 +2567,7 @@ PyOpenSSL = "*"
 name = "networkx"
 version = "3.1"
 description = "Python package for creating and manipulating graphs and networks"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2459,6 +2586,7 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
 name = "nodeenv"
 version = "1.7.0"
 description = "Node.js virtual environment builder"
+category = "dev"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
 files = [
@@ -2473,6 +2601,7 @@ setuptools = "*"
 name = "nose"
 version = "1.3.7"
 description = "nose extends unittest to make testing easier"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2485,6 +2614,7 @@ files = [
 name = "numpy"
 version = "1.24.2"
 description = "Fundamental package for array computing in Python"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2522,6 +2652,7 @@ files = [
 name = "openapi-codec"
 version = "1.3.2"
 description = "An OpenAPI codec for Core API."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2535,6 +2666,7 @@ coreapi = ">=2.2.0"
 name = "outcome"
 version = "1.2.0"
 description = "Capture the outcome of Python function calls."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2549,6 +2681,7 @@ attrs = ">=19.2.0"
 name = "packaging"
 version = "23.1"
 description = "Core utilities for Python packages"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2560,6 +2693,7 @@ files = [
 name = "pandas"
 version = "1.5.0"
 description = "Powerful data structures for data analysis, time series, and statistics"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2604,6 +2738,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
 name = "parso"
 version = "0.8.3"
 description = "A Python Parser"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2619,6 +2754,7 @@ testing = ["docopt", "pytest (<6.0.0)"]
 name = "pathspec"
 version = "0.9.0"
 description = "Utility library for gitignore style pattern matching of file paths."
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 files = [
@@ -2630,6 +2766,7 @@ files = [
 name = "pexpect"
 version = "4.8.0"
 description = "Pexpect allows easy control of interactive console applications."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2644,6 +2781,7 @@ ptyprocess = ">=0.5"
 name = "pickleshare"
 version = "0.7.5"
 description = "Tiny 'shelve'-like database with concurrency support"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2655,6 +2793,7 @@ files = [
 name = "pillow"
 version = "9.3.0"
 description = "Python Imaging Library (Fork)"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2729,6 +2868,7 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 name = "platformdirs"
 version = "2.5.1"
 description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2744,6 +2884,7 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock
 name = "pluggy"
 version = "0.13.1"
 description = "plugin and hook calling mechanisms for python"
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2758,6 +2899,7 @@ dev = ["pre-commit", "tox"]
 name = "pre-commit"
 version = "3.3.1"
 description = "A framework for managing and maintaining multi-language pre-commit hooks."
+category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2776,6 +2918,7 @@ virtualenv = ">=20.10.0"
 name = "probableparsing"
 version = "0.0.1"
 description = "Common methods for propbable parsers"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2787,6 +2930,7 @@ files = [
 name = "prompt-toolkit"
 version = "3.0.31"
 description = "Library for building powerful interactive command lines in Python"
+category = "main"
 optional = false
 python-versions = ">=3.6.2"
 files = [
@@ -2801,6 +2945,7 @@ wcwidth = "*"
 name = "psycopg2"
 version = "2.9.5"
 description = "psycopg2 - Python-PostgreSQL Database Adapter"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2823,6 +2968,7 @@ files = [
 name = "ptyprocess"
 version = "0.7.0"
 description = "Run a subprocess in a pseudo terminal"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2834,6 +2980,7 @@ files = [
 name = "pure-eval"
 version = "0.2.2"
 description = "Safely evaluate AST nodes without side effects"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2848,6 +2995,7 @@ tests = ["pytest"]
 name = "pyahocorasick"
 version = "1.4.2"
 description = "pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search.  With the ahocorasick.Automaton class, you can find multiple key strings occurrences at once in some input text.  You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search.  Implemented in C and tested on Python 2.7 and 3.4+.  Works on Linux, Mac and Windows. BSD-3-clause license."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2858,6 +3006,7 @@ files = [
 name = "pyasn1"
 version = "0.4.8"
 description = "ASN.1 types and codecs"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2869,6 +3018,7 @@ files = [
 name = "pyasn1-modules"
 version = "0.3.0"
 description = "A collection of ASN.1-based protocols modules"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
@@ -2883,6 +3033,7 @@ pyasn1 = ">=0.4.6,<0.6.0"
 name = "pycodestyle"
 version = "2.10.0"
 description = "Python style guide checker"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2894,6 +3045,7 @@ files = [
 name = "pycparser"
 version = "2.21"
 description = "C parser in Python"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2905,6 +3057,7 @@ files = [
 name = "pyflakes"
 version = "3.0.1"
 description = "passive checker of Python programs"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2916,6 +3069,7 @@ files = [
 name = "pygments"
 version = "2.13.0"
 description = "Pygments is a syntax highlighting package written in Python."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2930,6 +3084,7 @@ plugins = ["importlib-metadata"]
 name = "pylint"
 version = "2.17.3"
 description = "python code static checker"
+category = "dev"
 optional = false
 python-versions = ">=3.7.2"
 files = [
@@ -2958,6 +3113,7 @@ testutils = ["gitpython (>3)"]
 name = "pyopenssl"
 version = "20.0.1"
 description = "Python wrapper module around the OpenSSL library"
+category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
 files = [
@@ -2977,6 +3133,7 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"]
 name = "pyparsing"
 version = "2.4.7"
 description = "Python parsing module"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -2988,6 +3145,7 @@ files = [
 name = "pysocks"
 version = "1.7.1"
 description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3000,6 +3158,7 @@ files = [
 name = "pystemmer"
 version = "2.0.1"
 description = "Snowball stemming algorithms, for information retrieval"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3010,6 +3169,7 @@ files = [
 name = "pytest"
 version = "7.3.1"
 description = "pytest: simple powerful testing with Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3032,6 +3192,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
 name = "pytest-django"
 version = "4.5.2"
 description = "A Django plugin for pytest."
+category = "dev"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3050,6 +3211,7 @@ testing = ["Django", "django-configurations (>=2.0)"]
 name = "python-crfsuite"
 version = "0.9.9"
 description = "Python binding for CRFsuite"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3100,6 +3262,7 @@ files = [
 name = "python-dateutil"
 version = "2.8.2"
 description = "Extensions to the standard Python datetime module"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
@@ -3114,6 +3277,7 @@ six = ">=1.5"
 name = "python-dotenv"
 version = "1.0.0"
 description = "Read key-value pairs from a .env file and set them as environment variables"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -3128,6 +3292,7 @@ cli = ["click (>=5.0)"]
 name = "python-levenshtein"
 version = "0.12.2"
 description = "Python extension for computing string edit distances and similarities."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3141,6 +3306,7 @@ setuptools = "*"
 name = "python-magic"
 version = "0.4.22"
 description = "File type identification using libmagic"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -3152,6 +3318,7 @@ files = [
 name = "python-stdnum"
 version = "1.16"
 description = "Python module to handle standardized numbers and codes"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3168,6 +3335,7 @@ soap-fallback = ["PySimpleSOAP"]
 name = "pytz"
 version = "2021.1"
 description = "World timezone definitions, modern and historical"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3179,6 +3347,7 @@ files = [
 name = "pytz-deprecation-shim"
 version = "0.1.0.post0"
 description = "Shims to make deprecation of pytz easier"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
@@ -3193,6 +3362,7 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""}
 name = "pyyaml"
 version = "5.4.1"
 description = "YAML parser and emitter for Python"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -3231,6 +3401,7 @@ files = [
 name = "redis"
 version = "3.5.3"
 description = "Python client for Redis key-value store"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -3245,6 +3416,7 @@ hiredis = ["hiredis (>=0.1.3)"]
 name = "regex"
 version = "2022.1.18"
 description = "Alternative regular expression module, to replace re."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3328,6 +3500,7 @@ files = [
 name = "reporters-db"
 version = "3.2.36"
 description = "Database of Court Reporters"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3342,6 +3515,7 @@ six = ">=1.0.0"
 name = "requests"
 version = "2.31.0"
 description = "Python HTTP for Humans."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3363,6 +3537,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 name = "requests-file"
 version = "1.5.1"
 description = "File transport adapter for Requests"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3378,6 +3553,7 @@ six = "*"
 name = "s3transfer"
 version = "0.3.6"
 description = "An Amazon S3 Transfer Manager"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3392,6 +3568,7 @@ botocore = ">=1.12.36,<2.0a.0"
 name = "schema"
 version = "0.7.4"
 description = "Simple data validation library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3406,6 +3583,7 @@ contextlib2 = ">=0.5.5"
 name = "scipy"
 version = "1.10.1"
 description = "Fundamental algorithms for scientific computing in Python"
+category = "main"
 optional = false
 python-versions = "<3.12,>=3.8"
 files = [
@@ -3444,6 +3622,7 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo
 name = "scorched"
 version = "0.13.1.dev0"
 description = ""
+category = "main"
 optional = false
 python-versions = "*"
 files = []
@@ -3466,6 +3645,7 @@ resolved_reference = "0632024e72e22a71e17cdb778805561f7cdd33d8"
 name = "selenium"
 version = "4.9.1"
 description = ""
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3483,6 +3663,7 @@ urllib3 = {version = ">=1.26,<3", extras = ["socks"]}
 name = "sentry-sdk"
 version = "1.14.0"
 description = "Python client for Sentry (https://sentry.io)"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3521,6 +3702,7 @@ tornado = ["tornado (>=5)"]
 name = "service-identity"
 version = "21.1.0"
 description = "Service identity verification for pyOpenSSL & cryptography."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3545,6 +3727,7 @@ tests = ["coverage[toml] (>=5.0.2)", "pytest"]
 name = "setuptools"
 version = "65.5.1"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3561,6 +3744,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (
 name = "sgmllib3k"
 version = "1.0.0"
 description = "Py3k port of sgmllib."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3571,6 +3755,7 @@ files = [
 name = "simplejson"
 version = "3.18.3"
 description = "Simple, fast, extensible JSON encoder/decoder for Python"
+category = "main"
 optional = false
 python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -3653,6 +3838,7 @@ files = [
 name = "six"
 version = "1.15.0"
 description = "Python 2 and 3 compatibility utilities"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -3664,6 +3850,7 @@ files = [
 name = "sniffio"
 version = "1.3.0"
 description = "Sniff out which async library your code is running under"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3675,6 +3862,7 @@ files = [
 name = "sortedcontainers"
 version = "2.4.0"
 description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3686,6 +3874,7 @@ files = [
 name = "soupsieve"
 version = "2.2.1"
 description = "A modern CSS selector implementation for Beautiful Soup."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -3697,6 +3886,7 @@ files = [
 name = "sqlparse"
 version = "0.4.4"
 description = "A non-validating SQL parser."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3713,6 +3903,7 @@ test = ["pytest", "pytest-cov"]
 name = "stack-data"
 version = "0.5.1"
 description = "Extract data from python stack frames and tracebacks for informative displays"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3732,6 +3923,7 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 name = "stripe"
 version = "5.2.0"
 description = "Python bindings for the Stripe API"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3746,6 +3938,7 @@ requests = {version = ">=2.20", markers = "python_version >= \"3.0\""}
 name = "tblib"
 version = "1.7.0"
 description = "Traceback serialization library."
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -3757,6 +3950,7 @@ files = [
 name = "texttable"
 version = "1.6.4"
 description = "module for creating simple ASCII tables"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3768,6 +3962,7 @@ files = [
 name = "time-machine"
 version = "2.9.0"
 description = "Travel through time in your tests."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3833,6 +4028,7 @@ python-dateutil = "*"
 name = "timeout-decorator"
 version = "0.5.0"
 description = "Timeout decorator"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3843,6 +4039,7 @@ files = [
 name = "tldextract"
 version = "3.4.0"
 description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3860,6 +4057,7 @@ requests-file = ">=1.4"
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3871,6 +4069,7 @@ files = [
 name = "tomlkit"
 version = "0.11.8"
 description = "Style preserving TOML library"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3882,6 +4081,7 @@ files = [
 name = "tqdm"
 version = "4.59.0"
 description = "Fast, Extensible Progress Meter"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
 files = [
@@ -3898,6 +4098,7 @@ telegram = ["requests"]
 name = "traitlets"
 version = "5.4.0"
 description = ""
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3912,6 +4113,7 @@ test = ["pre-commit", "pytest"]
 name = "trio"
 version = "0.21.0"
 description = "A friendly Python library for async concurrency and I/O"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3932,6 +4134,7 @@ sortedcontainers = "*"
 name = "trio-websocket"
 version = "0.9.2"
 description = "WebSocket library for Trio"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3948,6 +4151,7 @@ wsproto = ">=0.14"
 name = "twisted"
 version = "22.4.0"
 description = "An asynchronous networking framework written in Python"
+category = "main"
 optional = false
 python-versions = ">=3.6.7"
 files = [
@@ -3988,6 +4192,7 @@ windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.
 name = "twisted-iocpsupport"
 version = "1.0.3"
 description = "An extension for use in the twisted I/O Completion Ports reactor."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4013,6 +4218,7 @@ files = [
 name = "txaio"
 version = "23.1.1"
 description = "Compatibility API between asyncio/Twisted/Trollius"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4029,6 +4235,7 @@ twisted = ["twisted (>=20.3.0)", "zope.interface (>=5.2.0)"]
 name = "types-dateparser"
 version = "1.1.4.6"
 description = "Typing stubs for dateparser"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4040,6 +4247,7 @@ files = [
 name = "types-pyopenssl"
 version = "23.0.0.4"
 description = "Typing stubs for pyOpenSSL"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4054,6 +4262,7 @@ cryptography = ">=35.0.0"
 name = "types-python-dateutil"
 version = "2.8.19.12"
 description = "Typing stubs for python-dateutil"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4065,6 +4274,7 @@ files = [
 name = "types-pytz"
 version = "2021.3.5"
 description = "Typing stubs for pytz"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4076,6 +4286,7 @@ files = [
 name = "types-pyyaml"
 version = "6.0.4"
 description = "Typing stubs for PyYAML"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4087,6 +4298,7 @@ files = [
 name = "types-redis"
 version = "4.5.4.1"
 description = "Typing stubs for redis"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4102,6 +4314,7 @@ types-pyOpenSSL = "*"
 name = "types-requests"
 version = "2.29.0.0"
 description = "Typing stubs for requests"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4116,6 +4329,7 @@ types-urllib3 = "<1.27"
 name = "types-simplejson"
 version = "3.19.0.0"
 description = "Typing stubs for simplejson"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4127,6 +4341,7 @@ files = [
 name = "types-urllib3"
 version = "1.26.11"
 description = "Typing stubs for urllib3"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4138,6 +4353,7 @@ files = [
 name = "typing-extensions"
 version = "4.1.1"
 description = "Backported and Experimental Type Hints for Python 3.6+"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4149,6 +4365,7 @@ files = [
 name = "tzdata"
 version = "2022.7"
 description = "Provider of IANA time zone data"
+category = "main"
 optional = false
 python-versions = ">=2"
 files = [
@@ -4160,6 +4377,7 @@ files = [
 name = "tzlocal"
 version = "4.2"
 description = "tzinfo object for the local timezone"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4179,6 +4397,7 @@ test = ["pytest (>=4.3)", "pytest-mock (>=3.3)"]
 name = "unidecode"
 version = "1.2.0"
 description = "ASCII transliterations of Unicode text"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -4190,6 +4409,7 @@ files = [
 name = "uritemplate"
 version = "3.0.1"
 description = "URI templates"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -4201,6 +4421,7 @@ files = [
 name = "urllib3"
 version = "1.26.15"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -4220,6 +4441,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 name = "usaddress"
 version = "0.5.10"
 description = "Parse US addresses using conditional random fields"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4236,6 +4458,7 @@ python-crfsuite = ">=0.7"
 name = "uvicorn"
 version = "0.22.0"
 description = "The lightning-fast ASGI server."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4250,7 +4473,7 @@ h11 = ">=0.8"
 httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""}
 python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""}
 pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""}
-uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""}
+uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""}
 watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""}
 websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""}
 
@@ -4261,6 +4484,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
 name = "uvloop"
 version = "0.17.0"
 description = "Fast implementation of asyncio event loop on top of libuv"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4305,6 +4529,7 @@ test = ["Cython (>=0.29.32,<0.30.0)", "aiohttp", "flake8 (>=3.9.2,<3.10.0)", "my
 name = "vine"
 version = "5.0.0"
 description = "Promises, promises, promises."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4316,6 +4541,7 @@ files = [
 name = "virtualenv"
 version = "20.17.1"
 description = "Virtual Python Environment builder"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4336,6 +4562,7 @@ testing = ["coverage (>=6.2)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7
 name = "watchfiles"
 version = "0.19.0"
 description = "Simple, modern and high performance file watching and code reload in python."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4370,6 +4597,7 @@ anyio = ">=3.0.0"
 name = "wcwidth"
 version = "0.2.5"
 description = "Measures the displayed width of unicode strings in a terminal"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4381,6 +4609,7 @@ files = [
 name = "webencodings"
 version = "0.5.1"
 description = "Character encoding aliases for legacy web content"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4392,6 +4621,7 @@ files = [
 name = "websockets"
 version = "11.0.3"
 description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4471,6 +4701,7 @@ files = [
 name = "wrapt"
 version = "1.15.0"
 description = "Module for decorators, wrappers and monkey patching."
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 files = [
@@ -4555,6 +4786,7 @@ files = [
 name = "wsproto"
 version = "1.2.0"
 description = "WebSockets state-machine based protocol implementation"
+category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -4569,6 +4801,7 @@ h11 = ">=0.9.0,<1"
 name = "zope-interface"
 version = "6.0"
 description = "Interfaces for Python"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [

From e067b3bcd89b42a0409239de3966e667acda6b33 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 23:08:40 +0000
Subject: [PATCH 004/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e531956054..e9052f9eaf 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ This repository is organized in the following way:
  - scripts: logrotate, systemd, etc, and init scripts for our various configurations and daemons.
 
 
-## Getting Involved 
+## Getting Involved
 
 If you want to get involved send us an email with your contact info or take a look through the [issues list][issues]. There are innumerable things we need help with, but we especially are looking for help with:
 

From 661a8786def6bb2ce751dfc236906e6954b7bba9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 23:09:25 +0000
Subject: [PATCH 005/372] build(deps): bump django-ses from 3.3.0 to 3.5.0

Bumps [django-ses](https://github.com/django-ses/django-ses) from 3.3.0 to 3.5.0.
- [Release notes](https://github.com/django-ses/django-ses/releases)
- [Changelog](https://github.com/django-ses/django-ses/blob/master/CHANGES.md)
- [Commits](https://github.com/django-ses/django-ses/compare/v3.3.0...v3.5.0)

---
updated-dependencies:
- dependency-name: django-ses
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 251 +++++++++++++++++++++++++++++++++++++++++++++++--
 pyproject.toml |   2 +-
 2 files changed, 243 insertions(+), 10 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index d923189ec5..c316342e1d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,9 +1,10 @@
-# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
+# This file is automatically @generated by Poetry and should not be changed by hand.
 
 [[package]]
 name = "amqp"
 version = "5.1.1"
 description = "Low-level AMQP client for Python (fork of amqplib)."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -18,6 +19,7 @@ vine = ">=5.0.0"
 name = "anyio"
 version = "3.6.2"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
+category = "main"
 optional = false
 python-versions = ">=3.6.2"
 files = [
@@ -38,6 +40,7 @@ trio = ["trio (>=0.16,<0.22)"]
 name = "appnope"
 version = "0.1.3"
 description = "Disable App Nap on macOS >= 10.9"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -49,6 +52,7 @@ files = [
 name = "argparse"
 version = "1.4.0"
 description = "Python command-line parsing library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -60,6 +64,7 @@ files = [
 name = "asgiref"
 version = "3.6.0"
 description = "ASGI specs, helper code, and adapters"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -74,6 +79,7 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
 name = "astor"
 version = "0.8.1"
 description = "Read/rewrite/write Python ASTs"
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
 files = [
@@ -85,6 +91,7 @@ files = [
 name = "astroid"
 version = "2.15.4"
 description = "An abstract syntax tree for Python with inference support."
+category = "dev"
 optional = false
 python-versions = ">=3.7.2"
 files = [
@@ -104,6 +111,7 @@ wrapt = [
 name = "asttokens"
 version = "2.0.8"
 description = "Annotate AST trees with source code positions"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -121,6 +129,7 @@ test = ["astroid (<=2.5.3)", "pytest"]
 name = "async-generator"
 version = "1.10"
 description = "Async generators and context managers for Python 3.5+"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -132,6 +141,7 @@ files = [
 name = "attrs"
 version = "20.3.0"
 description = "Classes Without Boilerplate"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -149,6 +159,7 @@ tests-no-zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>
 name = "autobahn"
 version = "23.1.2"
 description = "WebSocket client & server library, WAMP real-time framework"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -177,6 +188,7 @@ xbr = ["base58 (>=2.1.0)", "cbor2 (>=5.2.0)", "click (>=8.1.2)", "ecdsa (>=0.16.
 name = "automat"
 version = "22.10.0"
 description = "Self-service finite-state machines for the programmer on the go."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -195,6 +207,7 @@ visualize = ["Twisted (>=16.1.1)", "graphviz (>0.5.1)"]
 name = "backcall"
 version = "0.2.0"
 description = "Specifications for callback functions passed in to an API"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -206,6 +219,7 @@ files = [
 name = "beautifulsoup4"
 version = "4.11.2"
 description = "Screen-scraping library"
+category = "main"
 optional = false
 python-versions = ">=3.6.0"
 files = [
@@ -224,6 +238,7 @@ lxml = ["lxml"]
 name = "billiard"
 version = "4.1.0"
 description = "Python multiprocessing fork with improvements and bugfixes"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -235,6 +250,7 @@ files = [
 name = "black"
 version = "23.3.0"
 description = "The uncompromising code formatter."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -283,6 +299,7 @@ uvloop = ["uvloop (>=0.15.2)"]
 name = "boto3"
 version = "1.17.43"
 description = "The AWS SDK for Python"
+category = "main"
 optional = false
 python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -299,6 +316,7 @@ s3transfer = ">=0.3.0,<0.4.0"
 name = "botocore"
 version = "1.20.43"
 description = "Low-level, data-driven core of boto 3."
+category = "main"
 optional = false
 python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -318,6 +336,7 @@ crt = ["awscrt (==0.10.8)"]
 name = "celery"
 version = "5.3.0"
 description = "Distributed Task Queue."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -373,6 +392,7 @@ zstd = ["zstandard (==0.21.0)"]
 name = "certifi"
 version = "2022.12.7"
 description = "Python package for providing Mozilla's CA Bundle."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -384,6 +404,7 @@ files = [
 name = "cffi"
 version = "1.14.5"
 description = "Foreign Function Interface for Python calling C code."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -445,6 +466,7 @@ pycparser = "*"
 name = "cfgv"
 version = "3.3.1"
 description = "Validate configuration and produce human readable error messages."
+category = "dev"
 optional = false
 python-versions = ">=3.6.1"
 files = [
@@ -456,6 +478,7 @@ files = [
 name = "chardet"
 version = "5.1.0"
 description = "Universal encoding detector for Python 3"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -467,6 +490,7 @@ files = [
 name = "charset-normalizer"
 version = "3.1.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -551,6 +575,7 @@ files = [
 name = "click"
 version = "8.1.2"
 description = "Composable command line interface toolkit"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -565,6 +590,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
 name = "click-didyoumean"
 version = "0.3.0"
 description = "Enables git-like *did-you-mean* feature in click"
+category = "main"
 optional = false
 python-versions = ">=3.6.2,<4.0.0"
 files = [
@@ -579,6 +605,7 @@ click = ">=7"
 name = "click-plugins"
 version = "1.1.1"
 description = "An extension module for click to enable registering CLI commands via setuptools entry-points."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -596,6 +623,7 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"]
 name = "click-repl"
 version = "0.2.0"
 description = "REPL plugin for Click"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -612,6 +640,7 @@ six = "*"
 name = "climage"
 version = "0.1.3"
 description = "Convert images to beautiful ANSI escape codes"
+category = "main"
 optional = false
 python-versions = ">=3.2"
 files = [
@@ -627,6 +656,7 @@ Pillow = "*"
 name = "colorama"
 version = "0.4.6"
 description = "Cross-platform colored terminal text."
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 files = [
@@ -638,6 +668,7 @@ files = [
 name = "constantly"
 version = "15.1.0"
 description = "Symbolic constants in Python"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -649,6 +680,7 @@ files = [
 name = "contextlib2"
 version = "0.6.0.post1"
 description = "Backports and enhancements for the contextlib module"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -660,6 +692,7 @@ files = [
 name = "coreapi"
 version = "2.3.3"
 description = "Python client library for Core API."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -677,6 +710,7 @@ uritemplate = "*"
 name = "coreschema"
 version = "0.0.4"
 description = "Core Schema."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -691,6 +725,7 @@ jinja2 = "*"
 name = "courts-db"
 version = "0.10.9"
 description = "Database of Courts"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -702,6 +737,7 @@ files = [
 name = "cryptography"
 version = "36.0.2"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -742,6 +778,7 @@ test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0
 name = "cssselect"
 version = "1.2.0"
 description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -753,6 +790,7 @@ files = [
 name = "daphne"
 version = "4.0.0"
 description = "Django ASGI (HTTP/WebSocket) server"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -772,6 +810,7 @@ tests = ["django", "hypothesis", "pytest", "pytest-asyncio"]
 name = "datasketch"
 version = "1.5.7"
 description = "Probabilistic data structures for processing and searching very large datasets"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -794,6 +833,7 @@ test = ["cassandra-driver (>=3.20)", "coverage", "mock (>=2.0.0)", "mockredispy"
 name = "dateparser"
 version = "1.1.8"
 description = "Date parsing library designed to parse dates from HTML pages"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -816,6 +856,7 @@ langdetect = ["langdetect"]
 name = "decorator"
 version = "5.1.1"
 description = "Decorators for Humans"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -827,6 +868,7 @@ files = [
 name = "defusedxml"
 version = "0.7.1"
 description = "XML bomb protection for Python stdlib modules"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -838,6 +880,7 @@ files = [
 name = "dill"
 version = "0.3.6"
 description = "serialize all of python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -852,6 +895,7 @@ graph = ["objgraph (>=1.7.2)"]
 name = "disposable-email-domains"
 version = "0.0.64"
 description = "A set of disposable email domains"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -866,6 +910,7 @@ dev = ["check-manifest"]
 name = "distlib"
 version = "0.3.6"
 description = "Distribution utilities"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -877,6 +922,7 @@ files = [
 name = "django"
 version = "4.2.1"
 description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -897,6 +943,7 @@ bcrypt = ["bcrypt"]
 name = "django-admin-cursor-paginator"
 version = "0.1.2"
 description = "Drop-in replacement for django admin default pagination that works fast with huge tables."
+category = "main"
 optional = false
 python-versions = ">=3.4"
 files = [
@@ -911,6 +958,7 @@ Django = ">=2.0"
 name = "django-cache-memoize"
 version = "0.1.8"
 description = "Django utility for a memoization decorator that uses the Django cache framework."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -925,6 +973,7 @@ dev = ["black", "flake8", "therapist", "tox", "twine"]
 name = "django-cors-headers"
 version = "3.14.0"
 description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -939,6 +988,7 @@ Django = ">=3.2"
 name = "django-debug-toolbar"
 version = "4.0.0"
 description = "A configurable set of panels that display various debug information about the current request/response."
+category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -954,6 +1004,7 @@ sqlparse = ">=0.2"
 name = "django-elasticsearch-dsl"
 version = "7.3"
 description = "Wrapper around elasticsearch-dsl-py for django models"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -969,6 +1020,7 @@ six = "*"
 name = "django-environ"
 version = "0.8.1"
 description = "A package that allows you to utilize 12factor inspired environment variables to configure your Django application."
+category = "main"
 optional = false
 python-versions = ">=3.4,<4"
 files = [
@@ -977,14 +1029,15 @@ files = [
 ]
 
 [package.extras]
-develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.dev0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
-docs = ["furo (>=2021.8.17b43,<2021.9.dev0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
+develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
+docs = ["furo (>=2021.8.17b43,<2021.9.0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"]
 testing = ["coverage[toml] (>=5.0a4)", "pytest (>=4.6.11)"]
 
 [[package]]
 name = "django-extensions"
 version = "3.2.1"
 description = "Extensions for Django"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -999,6 +1052,7 @@ Django = ">=3.2"
 name = "django-filter"
 version = "2.4.0"
 description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1013,6 +1067,7 @@ Django = ">=2.2"
 name = "django-hcaptcha"
 version = "0.2.0"
 description = "Django hCaptcha provides a simple way to protect your django forms using hCaptcha"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1024,6 +1079,7 @@ files = [
 name = "django-localflavor"
 version = "3.1"
 description = "Country-specific Django helpers"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1039,6 +1095,7 @@ python-stdnum = ">=1.6"
 name = "django-markdown-deux"
 version = "1.0.6"
 description = "a Django app that provides template tags for using Markdown (using the python-markdown2 processor)"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1052,6 +1109,7 @@ markdown2 = "*"
 name = "django-mathfilters"
 version = "1.0.0"
 description = "A set of simple math filters for Django"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1063,6 +1121,7 @@ files = [
 name = "django-override-storage"
 version = "0.3.2"
 description = "Django test helpers to manage file storage side effects."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -1074,6 +1133,7 @@ files = [
 name = "django-pghistory"
 version = "2.7.0"
 description = "History tracking for Django and Postgres"
+category = "main"
 optional = false
 python-versions = ">=3.7.0,<4"
 files = [
@@ -1089,6 +1149,7 @@ django-pgtrigger = ">=4.5.0"
 name = "django-pgtrigger"
 version = "4.6.0"
 description = "Postgres trigger support integrated with Django models."
+category = "main"
 optional = false
 python-versions = ">=3.7.0,<4"
 files = [
@@ -1103,6 +1164,7 @@ django = ">=2"
 name = "django-ratelimit"
 version = "4.0.0"
 description = "Cache-based rate-limiting for Django."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1112,13 +1174,14 @@ files = [
 
 [[package]]
 name = "django-ses"
-version = "3.3.0"
+version = "3.5.0"
 description = "A Django email backend for Amazon's Simple Email Service"
+category = "main"
 optional = false
 python-versions = ">=3.7,<4.0"
 files = [
-    {file = "django_ses-3.3.0-py3-none-any.whl", hash = "sha256:029edd3f23333f09cd14b4ebc274a5c3700819f16f1fcd5a25b5639b3be0fdba"},
-    {file = "django_ses-3.3.0.tar.gz", hash = "sha256:bdfc5bd4bf1f95d01756761bc9dae40b79c4c709e49ff1b9bd9e853dfd09efb1"},
+    {file = "django_ses-3.5.0-py3-none-any.whl", hash = "sha256:3522fe531155eb06bb015b3b36324c059194450633b33f9bd5bc9d1328822fe2"},
+    {file = "django_ses-3.5.0.tar.gz", hash = "sha256:dc1644f50608fbf3a64f085a371c61d56d68eba3c5efa69651f13dc3ba05049d"},
 ]
 
 [package.dependencies]
@@ -1136,6 +1199,7 @@ events = ["cryptography (>=36.0.2)", "requests (>=2.27.1)"]
 name = "django-storages"
 version = "1.13.2"
 description = "Support for many storage backends in Django"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1158,6 +1222,7 @@ sftp = ["paramiko (>=1.10.0)"]
 name = "django-stubs"
 version = "4.2.0"
 description = "Mypy stubs for Django"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1181,6 +1246,7 @@ compatible-mypy = ["mypy (>=1.2.0,<1.3)"]
 name = "django-stubs-ext"
 version = "4.2.0"
 description = "Monkey-patching and extensions for django-stubs"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1196,6 +1262,7 @@ typing-extensions = "*"
 name = "django-waffle"
 version = "3.0.0"
 description = "A feature flipper for Django."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1207,6 +1274,7 @@ files = [
 name = "djangorestframework"
 version = "3.14.0"
 description = "Web APIs for Django, made easy."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1222,6 +1290,7 @@ pytz = "*"
 name = "djangorestframework-filters"
 version = "1.0.0.dev2"
 description = "Better filtering for Django REST Framework"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1237,6 +1306,7 @@ djangorestframework = "*"
 name = "djangorestframework-stubs"
 version = "3.14.0"
 description = "PEP-484 stubs for django-rest-framework"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1261,6 +1331,7 @@ markdown = ["types-Markdown (>=0.1.5)"]
 name = "djangorestframework-xml"
 version = "2.0.0"
 description = "XML support for Django REST Framework"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1280,6 +1351,7 @@ tests = ["Django (>=1.6)", "djangorestframework (>=2.4.3)", "flake8", "pytest",
 name = "docopt"
 version = "0.6.2"
 description = "Pythonic argument parser, that will make you smile"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1290,6 +1362,7 @@ files = [
 name = "drf-dynamic-fields"
 version = "0.3.1"
 description = "Dynamically return subset of Django REST Framework serializer fields"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1301,6 +1374,7 @@ files = [
 name = "elasticsearch"
 version = "7.17.9"
 description = "Python client for Elasticsearch"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
 files = [
@@ -1322,6 +1396,7 @@ requests = ["requests (>=2.4.0,<3.0.0)"]
 name = "elasticsearch-dsl"
 version = "7.4.0"
 description = "Python client for Elasticsearch"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1341,6 +1416,7 @@ develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytes
 name = "exceptiongroup"
 version = "1.1.1"
 description = "Backport of PEP 654 (exception groups)"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1355,6 +1431,7 @@ test = ["pytest (>=6)"]
 name = "executing"
 version = "1.1.0"
 description = "Get the currently executing AST node of a frame, and other information"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1369,6 +1446,7 @@ tests = ["asttokens", "littleutils", "pytest", "rich"]
 name = "exrex"
 version = "0.11.0"
 description = "Irregular methods for regular expressions"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -1380,6 +1458,7 @@ files = [
 name = "eyecite"
 version = "2.4.0"
 description = "Tool for extracting legal citations from text strings."
+category = "main"
 optional = false
 python-versions = ">=3.7,<4.0"
 files = [
@@ -1399,6 +1478,7 @@ reporters-db = ">=3.2.2,<4.0.0"
 name = "factory-boy"
 version = "3.2.1"
 description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1417,6 +1497,7 @@ doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"]
 name = "faker"
 version = "13.3.1"
 description = "Faker is a Python package that generates fake data for you."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1431,6 +1512,7 @@ python-dateutil = ">=2.4"
 name = "fast-diff-match-patch"
 version = "2.0.1"
 description = "fast_diff_match_patch: Python package wrapping the C++ implementation of google-diff-match-patch"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1506,6 +1588,7 @@ files = [
 name = "feedparser"
 version = "6.0.10"
 description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1520,6 +1603,7 @@ sgmllib3k = "*"
 name = "filelock"
 version = "3.9.0"
 description = "A platform independent file lock."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1535,6 +1619,7 @@ testing = ["covdefaults (>=2.2.2)", "coverage (>=7.0.1)", "pytest (>=7.2)", "pyt
 name = "flake8"
 version = "6.0.0"
 description = "the modular source code checker: pep8 pyflakes and co"
+category = "dev"
 optional = false
 python-versions = ">=3.8.1"
 files = [
@@ -1551,6 +1636,7 @@ pyflakes = ">=3.0.0,<3.1.0"
 name = "flynt"
 version = "0.78"
 description = "CLI tool to convert a python project's %-formatted strings to f-strings."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1569,6 +1655,7 @@ dev = ["build", "pre-commit", "pytest", "pytest-cov", "twine"]
 name = "future"
 version = "0.18.3"
 description = "Clean single-source support for Python 3 and 2"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -1579,6 +1666,7 @@ files = [
 name = "fuzzywuzzy"
 version = "0.18.0"
 description = "Fuzzy string matching in python"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1593,6 +1681,7 @@ speedup = ["python-levenshtein (>=0.12)"]
 name = "geonamescache"
 version = "1.6.0"
 description = "Geonames data for continents, cities and US states."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1604,6 +1693,7 @@ files = [
 name = "gunicorn"
 version = "20.1.0"
 description = "WSGI HTTP Server for UNIX"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1624,6 +1714,7 @@ tornado = ["tornado (>=0.2)"]
 name = "h11"
 version = "0.13.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1635,6 +1726,7 @@ files = [
 name = "html5lib"
 version = "1.1"
 description = "HTML parser based on the WHATWG HTML specification"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -1656,6 +1748,7 @@ lxml = ["lxml"]
 name = "httplib2"
 version = "0.22.0"
 description = "A comprehensive HTTP client library."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1670,6 +1763,7 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0
 name = "httptools"
 version = "0.5.0"
 description = "A collection of framework independent HTTP protocol utils."
+category = "main"
 optional = false
 python-versions = ">=3.5.0"
 files = [
@@ -1723,6 +1817,7 @@ test = ["Cython (>=0.29.24,<0.30.0)"]
 name = "hyperlink"
 version = "21.0.0"
 description = "A featureful, immutable, and correct URL for Python."
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1737,6 +1832,7 @@ idna = ">=2.5"
 name = "identify"
 version = "2.5.17"
 description = "File identification library for Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1751,6 +1847,7 @@ license = ["ukkonen"]
 name = "idna"
 version = "2.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1762,6 +1859,7 @@ files = [
 name = "igraph"
 version = "0.10.4"
 description = "High performance graph data structures and algorithms"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1828,6 +1926,7 @@ test-musl = ["networkx (>=2.5)", "pytest (>=7.0.1)", "pytest-timeout (>=2.1.0)"]
 name = "incremental"
 version = "22.10.0"
 description = "\"A small library that versions your Python projects.\""
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1843,6 +1942,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
 name = "iniconfig"
 version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1854,6 +1954,7 @@ files = [
 name = "internetarchive"
 version = "3.3.0"
 description = "A Python interface to archive.org."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1879,6 +1980,7 @@ types = ["tqdm-stubs (>=0.2.0)", "types-colorama", "types-docopt (>=0.6.10,<0.7.
 name = "ipaddress"
 version = "1.0.23"
 description = "IPv4/IPv6 manipulation library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1890,6 +1992,7 @@ files = [
 name = "ipython"
 version = "8.10.0"
 description = "IPython: Productive Interactive Computing"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1928,6 +2031,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa
 name = "isort"
 version = "5.8.0"
 description = "A Python utility / library to sort Python imports."
+category = "dev"
 optional = false
 python-versions = ">=3.6,<4.0"
 files = [
@@ -1944,6 +2048,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"]
 name = "itypes"
 version = "1.2.0"
 description = "Simple immutable types for python."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1955,6 +2060,7 @@ files = [
 name = "jedi"
 version = "0.18.1"
 description = "An autocompletion tool for Python that can be used for text editors."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1973,6 +2079,7 @@ testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"]
 name = "jinja2"
 version = "2.11.3"
 description = "A very fast and expressive template engine."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -1990,6 +2097,7 @@ i18n = ["Babel (>=0.8)"]
 name = "jmespath"
 version = "0.10.0"
 description = "JSON Matching Expressions"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -2001,6 +2109,7 @@ files = [
 name = "jsonpatch"
 version = "1.32"
 description = "Apply JSON-Patches (RFC 6902)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -2015,6 +2124,7 @@ jsonpointer = ">=1.9"
 name = "jsonpointer"
 version = "2.1"
 description = "Identify specific nodes in a JSON document (RFC 6901)"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2026,6 +2136,7 @@ files = [
 name = "judge-pics"
 version = "2.0.2"
 description = "Database of Judge Pictures"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2045,6 +2156,7 @@ requests = ">=2.0,<3.0"
 name = "juriscraper"
 version = "2.5.49"
 description = "An API to scrape American court websites for metadata."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2072,6 +2184,7 @@ tldextract = "*"
 name = "kdtree"
 version = "0.16"
 description = "A Python implemntation of a kd-tree"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2083,6 +2196,7 @@ files = [
 name = "kombu"
 version = "5.3.0"
 description = "Messaging library for Python."
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2115,6 +2229,7 @@ zookeeper = ["kazoo (>=2.8.0)"]
 name = "lazy-object-proxy"
 version = "1.6.0"
 description = "A fast and thorough lazy object proxy."
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -2146,6 +2261,7 @@ files = [
 name = "lxml"
 version = "4.9.1"
 description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
 files = [
@@ -2231,6 +2347,7 @@ source = ["Cython (>=0.29.7)"]
 name = "lxml-stubs"
 version = "0.4.0"
 description = "Type annotations for the lxml package"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -2245,6 +2362,7 @@ test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1.
 name = "markdown2"
 version = "2.4.0"
 description = "A fast and complete Python implementation of Markdown"
+category = "main"
 optional = false
 python-versions = ">=3.5, <4"
 files = [
@@ -2256,6 +2374,7 @@ files = [
 name = "markupsafe"
 version = "1.1.1"
 description = "Safely add untrusted strings to HTML/XML markup."
+category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*"
 files = [
@@ -2317,6 +2436,7 @@ files = [
 name = "matplotlib-inline"
 version = "0.1.6"
 description = "Inline Matplotlib backend for Jupyter"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -2331,6 +2451,7 @@ traitlets = "*"
 name = "mccabe"
 version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2342,6 +2463,7 @@ files = [
 name = "mypy"
 version = "1.2.0"
 description = "Optional static typing for Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2388,6 +2510,7 @@ reports = ["lxml"]
 name = "mypy-extensions"
 version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
+category = "dev"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -2399,6 +2522,7 @@ files = [
 name = "nameparser"
 version = "1.1.1"
 description = "A simple Python module for parsing human names into their individual components."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2410,6 +2534,7 @@ files = [
 name = "natsort"
 version = "8.3.1"
 description = "Simple yet flexible natural sorting in Python."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2425,8 +2550,9 @@ icu = ["PyICU (>=1.0.0)"]
 name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
+category = "main"
 optional = false
-python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
+python-versions = ">=2.7,<3.0.0 || >=3.4.0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -2441,6 +2567,7 @@ PyOpenSSL = "*"
 name = "networkx"
 version = "3.1"
 description = "Python package for creating and manipulating graphs and networks"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2459,6 +2586,7 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
 name = "nodeenv"
 version = "1.7.0"
 description = "Node.js virtual environment builder"
+category = "dev"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
 files = [
@@ -2473,6 +2601,7 @@ setuptools = "*"
 name = "nose"
 version = "1.3.7"
 description = "nose extends unittest to make testing easier"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2485,6 +2614,7 @@ files = [
 name = "numpy"
 version = "1.24.2"
 description = "Fundamental package for array computing in Python"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2522,6 +2652,7 @@ files = [
 name = "openapi-codec"
 version = "1.3.2"
 description = "An OpenAPI codec for Core API."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2535,6 +2666,7 @@ coreapi = ">=2.2.0"
 name = "outcome"
 version = "1.2.0"
 description = "Capture the outcome of Python function calls."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2549,6 +2681,7 @@ attrs = ">=19.2.0"
 name = "packaging"
 version = "23.1"
 description = "Core utilities for Python packages"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2560,6 +2693,7 @@ files = [
 name = "pandas"
 version = "1.5.0"
 description = "Powerful data structures for data analysis, time series, and statistics"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2604,6 +2738,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
 name = "parso"
 version = "0.8.3"
 description = "A Python Parser"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2619,6 +2754,7 @@ testing = ["docopt", "pytest (<6.0.0)"]
 name = "pathspec"
 version = "0.9.0"
 description = "Utility library for gitignore style pattern matching of file paths."
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 files = [
@@ -2630,6 +2766,7 @@ files = [
 name = "pexpect"
 version = "4.8.0"
 description = "Pexpect allows easy control of interactive console applications."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2644,6 +2781,7 @@ ptyprocess = ">=0.5"
 name = "pickleshare"
 version = "0.7.5"
 description = "Tiny 'shelve'-like database with concurrency support"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2655,6 +2793,7 @@ files = [
 name = "pillow"
 version = "9.3.0"
 description = "Python Imaging Library (Fork)"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2729,6 +2868,7 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa
 name = "platformdirs"
 version = "2.5.1"
 description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -2744,6 +2884,7 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock
 name = "pluggy"
 version = "0.13.1"
 description = "plugin and hook calling mechanisms for python"
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2758,6 +2899,7 @@ dev = ["pre-commit", "tox"]
 name = "pre-commit"
 version = "3.3.1"
 description = "A framework for managing and maintaining multi-language pre-commit hooks."
+category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -2776,6 +2918,7 @@ virtualenv = ">=20.10.0"
 name = "probableparsing"
 version = "0.0.1"
 description = "Common methods for propbable parsers"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2787,6 +2930,7 @@ files = [
 name = "prompt-toolkit"
 version = "3.0.31"
 description = "Library for building powerful interactive command lines in Python"
+category = "main"
 optional = false
 python-versions = ">=3.6.2"
 files = [
@@ -2801,6 +2945,7 @@ wcwidth = "*"
 name = "psycopg2"
 version = "2.9.5"
 description = "psycopg2 - Python-PostgreSQL Database Adapter"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2823,6 +2968,7 @@ files = [
 name = "ptyprocess"
 version = "0.7.0"
 description = "Run a subprocess in a pseudo terminal"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2834,6 +2980,7 @@ files = [
 name = "pure-eval"
 version = "0.2.2"
 description = "Safely evaluate AST nodes without side effects"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2848,6 +2995,7 @@ tests = ["pytest"]
 name = "pyahocorasick"
 version = "1.4.2"
 description = "pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search.  With the ahocorasick.Automaton class, you can find multiple key strings occurrences at once in some input text.  You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search.  Implemented in C and tested on Python 2.7 and 3.4+.  Works on Linux, Mac and Windows. BSD-3-clause license."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2858,6 +3006,7 @@ files = [
 name = "pyasn1"
 version = "0.4.8"
 description = "ASN.1 types and codecs"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -2869,6 +3018,7 @@ files = [
 name = "pyasn1-modules"
 version = "0.3.0"
 description = "A collection of ASN.1-based protocols modules"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
@@ -2883,6 +3033,7 @@ pyasn1 = ">=0.4.6,<0.6.0"
 name = "pycodestyle"
 version = "2.10.0"
 description = "Python style guide checker"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2894,6 +3045,7 @@ files = [
 name = "pycparser"
 version = "2.21"
 description = "C parser in Python"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -2905,6 +3057,7 @@ files = [
 name = "pyflakes"
 version = "3.0.1"
 description = "passive checker of Python programs"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2916,6 +3069,7 @@ files = [
 name = "pygments"
 version = "2.13.0"
 description = "Pygments is a syntax highlighting package written in Python."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -2930,6 +3084,7 @@ plugins = ["importlib-metadata"]
 name = "pylint"
 version = "2.17.3"
 description = "python code static checker"
+category = "dev"
 optional = false
 python-versions = ">=3.7.2"
 files = [
@@ -2958,6 +3113,7 @@ testutils = ["gitpython (>3)"]
 name = "pyopenssl"
 version = "20.0.1"
 description = "Python wrapper module around the OpenSSL library"
+category = "main"
 optional = false
 python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
 files = [
@@ -2977,6 +3133,7 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"]
 name = "pyparsing"
 version = "2.4.7"
 description = "Python parsing module"
+category = "main"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -2988,6 +3145,7 @@ files = [
 name = "pysocks"
 version = "1.7.1"
 description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3000,6 +3158,7 @@ files = [
 name = "pystemmer"
 version = "2.0.1"
 description = "Snowball stemming algorithms, for information retrieval"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3010,6 +3169,7 @@ files = [
 name = "pytest"
 version = "7.3.1"
 description = "pytest: simple powerful testing with Python"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3032,6 +3192,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
 name = "pytest-django"
 version = "4.5.2"
 description = "A Django plugin for pytest."
+category = "dev"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3050,6 +3211,7 @@ testing = ["Django", "django-configurations (>=2.0)"]
 name = "python-crfsuite"
 version = "0.9.9"
 description = "Python binding for CRFsuite"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3100,6 +3262,7 @@ files = [
 name = "python-dateutil"
 version = "2.8.2"
 description = "Extensions to the standard Python datetime module"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
@@ -3114,6 +3277,7 @@ six = ">=1.5"
 name = "python-dotenv"
 version = "1.0.0"
 description = "Read key-value pairs from a .env file and set them as environment variables"
+category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -3128,6 +3292,7 @@ cli = ["click (>=5.0)"]
 name = "python-levenshtein"
 version = "0.12.2"
 description = "Python extension for computing string edit distances and similarities."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3141,6 +3306,7 @@ setuptools = "*"
 name = "python-magic"
 version = "0.4.22"
 description = "File type identification using libmagic"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -3152,6 +3318,7 @@ files = [
 name = "python-stdnum"
 version = "1.16"
 description = "Python module to handle standardized numbers and codes"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3168,6 +3335,7 @@ soap-fallback = ["PySimpleSOAP"]
 name = "pytz"
 version = "2021.1"
 description = "World timezone definitions, modern and historical"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3179,6 +3347,7 @@ files = [
 name = "pytz-deprecation-shim"
 version = "0.1.0.post0"
 description = "Shims to make deprecation of pytz easier"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
 files = [
@@ -3193,6 +3362,7 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""}
 name = "pyyaml"
 version = "5.4.1"
 description = "YAML parser and emitter for Python"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -3231,6 +3401,7 @@ files = [
 name = "redis"
 version = "3.5.3"
 description = "Python client for Redis key-value store"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -3245,6 +3416,7 @@ hiredis = ["hiredis (>=0.1.3)"]
 name = "regex"
 version = "2022.1.18"
 description = "Alternative regular expression module, to replace re."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3328,6 +3500,7 @@ files = [
 name = "reporters-db"
 version = "3.2.36"
 description = "Database of Court Reporters"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3342,6 +3515,7 @@ six = ">=1.0.0"
 name = "requests"
 version = "2.31.0"
 description = "Python HTTP for Humans."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3363,6 +3537,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 name = "requests-file"
 version = "1.5.1"
 description = "File transport adapter for Requests"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3378,6 +3553,7 @@ six = "*"
 name = "s3transfer"
 version = "0.3.6"
 description = "An Amazon S3 Transfer Manager"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3392,6 +3568,7 @@ botocore = ">=1.12.36,<2.0a.0"
 name = "schema"
 version = "0.7.4"
 description = "Simple data validation library"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3406,6 +3583,7 @@ contextlib2 = ">=0.5.5"
 name = "scipy"
 version = "1.10.1"
 description = "Fundamental algorithms for scientific computing in Python"
+category = "main"
 optional = false
 python-versions = "<3.12,>=3.8"
 files = [
@@ -3444,6 +3622,7 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo
 name = "scorched"
 version = "0.13.1.dev0"
 description = ""
+category = "main"
 optional = false
 python-versions = "*"
 files = []
@@ -3466,6 +3645,7 @@ resolved_reference = "0632024e72e22a71e17cdb778805561f7cdd33d8"
 name = "selenium"
 version = "4.9.1"
 description = ""
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3483,6 +3663,7 @@ urllib3 = {version = ">=1.26,<3", extras = ["socks"]}
 name = "sentry-sdk"
 version = "1.14.0"
 description = "Python client for Sentry (https://sentry.io)"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3521,6 +3702,7 @@ tornado = ["tornado (>=5)"]
 name = "service-identity"
 version = "21.1.0"
 description = "Service identity verification for pyOpenSSL & cryptography."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3545,6 +3727,7 @@ tests = ["coverage[toml] (>=5.0.2)", "pytest"]
 name = "setuptools"
 version = "65.5.1"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3561,6 +3744,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (
 name = "sgmllib3k"
 version = "1.0.0"
 description = "Py3k port of sgmllib."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3571,6 +3755,7 @@ files = [
 name = "simplejson"
 version = "3.18.3"
 description = "Simple, fast, extensible JSON encoder/decoder for Python"
+category = "main"
 optional = false
 python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -3653,6 +3838,7 @@ files = [
 name = "six"
 version = "1.15.0"
 description = "Python 2 and 3 compatibility utilities"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -3664,6 +3850,7 @@ files = [
 name = "sniffio"
 version = "1.3.0"
 description = "Sniff out which async library your code is running under"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3675,6 +3862,7 @@ files = [
 name = "sortedcontainers"
 version = "2.4.0"
 description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3686,6 +3874,7 @@ files = [
 name = "soupsieve"
 version = "2.2.1"
 description = "A modern CSS selector implementation for Beautiful Soup."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -3697,6 +3886,7 @@ files = [
 name = "sqlparse"
 version = "0.4.4"
 description = "A non-validating SQL parser."
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3713,6 +3903,7 @@ test = ["pytest", "pytest-cov"]
 name = "stack-data"
 version = "0.5.1"
 description = "Extract data from python stack frames and tracebacks for informative displays"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3732,6 +3923,7 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 name = "stripe"
 version = "5.2.0"
 description = "Python bindings for the Stripe API"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3746,6 +3938,7 @@ requests = {version = ">=2.20", markers = "python_version >= \"3.0\""}
 name = "tblib"
 version = "1.7.0"
 description = "Traceback serialization library."
+category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -3757,6 +3950,7 @@ files = [
 name = "texttable"
 version = "1.6.4"
 description = "module for creating simple ASCII tables"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3768,6 +3962,7 @@ files = [
 name = "time-machine"
 version = "2.9.0"
 description = "Travel through time in your tests."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3833,6 +4028,7 @@ python-dateutil = "*"
 name = "timeout-decorator"
 version = "0.5.0"
 description = "Timeout decorator"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -3843,6 +4039,7 @@ files = [
 name = "tldextract"
 version = "3.4.0"
 description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3860,6 +4057,7 @@ requests-file = ">=1.4"
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3871,6 +4069,7 @@ files = [
 name = "tomlkit"
 version = "0.11.8"
 description = "Style preserving TOML library"
+category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3882,6 +4081,7 @@ files = [
 name = "tqdm"
 version = "4.59.0"
 description = "Fast, Extensible Progress Meter"
+category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
 files = [
@@ -3898,6 +4098,7 @@ telegram = ["requests"]
 name = "traitlets"
 version = "5.4.0"
 description = ""
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3912,6 +4113,7 @@ test = ["pre-commit", "pytest"]
 name = "trio"
 version = "0.21.0"
 description = "A friendly Python library for async concurrency and I/O"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -3932,6 +4134,7 @@ sortedcontainers = "*"
 name = "trio-websocket"
 version = "0.9.2"
 description = "WebSocket library for Trio"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3948,6 +4151,7 @@ wsproto = ">=0.14"
 name = "twisted"
 version = "22.4.0"
 description = "An asynchronous networking framework written in Python"
+category = "main"
 optional = false
 python-versions = ">=3.6.7"
 files = [
@@ -3988,6 +4192,7 @@ windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.
 name = "twisted-iocpsupport"
 version = "1.0.3"
 description = "An extension for use in the twisted I/O Completion Ports reactor."
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4013,6 +4218,7 @@ files = [
 name = "txaio"
 version = "23.1.1"
 description = "Compatibility API between asyncio/Twisted/Trollius"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4029,6 +4235,7 @@ twisted = ["twisted (>=20.3.0)", "zope.interface (>=5.2.0)"]
 name = "types-dateparser"
 version = "1.1.4.6"
 description = "Typing stubs for dateparser"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4040,6 +4247,7 @@ files = [
 name = "types-pyopenssl"
 version = "23.0.0.4"
 description = "Typing stubs for pyOpenSSL"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4054,6 +4262,7 @@ cryptography = ">=35.0.0"
 name = "types-python-dateutil"
 version = "2.8.19.12"
 description = "Typing stubs for python-dateutil"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4065,6 +4274,7 @@ files = [
 name = "types-pytz"
 version = "2021.3.5"
 description = "Typing stubs for pytz"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4076,6 +4286,7 @@ files = [
 name = "types-pyyaml"
 version = "6.0.4"
 description = "Typing stubs for PyYAML"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4087,6 +4298,7 @@ files = [
 name = "types-redis"
 version = "4.5.4.1"
 description = "Typing stubs for redis"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4102,6 +4314,7 @@ types-pyOpenSSL = "*"
 name = "types-requests"
 version = "2.29.0.0"
 description = "Typing stubs for requests"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4116,6 +4329,7 @@ types-urllib3 = "<1.27"
 name = "types-simplejson"
 version = "3.19.0.0"
 description = "Typing stubs for simplejson"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4127,6 +4341,7 @@ files = [
 name = "types-urllib3"
 version = "1.26.11"
 description = "Typing stubs for urllib3"
+category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -4138,6 +4353,7 @@ files = [
 name = "typing-extensions"
 version = "4.1.1"
 description = "Backported and Experimental Type Hints for Python 3.6+"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4149,6 +4365,7 @@ files = [
 name = "tzdata"
 version = "2022.7"
 description = "Provider of IANA time zone data"
+category = "main"
 optional = false
 python-versions = ">=2"
 files = [
@@ -4160,6 +4377,7 @@ files = [
 name = "tzlocal"
 version = "4.2"
 description = "tzinfo object for the local timezone"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4179,6 +4397,7 @@ test = ["pytest (>=4.3)", "pytest-mock (>=3.3)"]
 name = "unidecode"
 version = "1.2.0"
 description = "ASCII transliterations of Unicode text"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -4190,6 +4409,7 @@ files = [
 name = "uritemplate"
 version = "3.0.1"
 description = "URI templates"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -4201,6 +4421,7 @@ files = [
 name = "urllib3"
 version = "1.26.15"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -4220,6 +4441,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 name = "usaddress"
 version = "0.5.10"
 description = "Parse US addresses using conditional random fields"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4236,6 +4458,7 @@ python-crfsuite = ">=0.7"
 name = "uvicorn"
 version = "0.22.0"
 description = "The lightning-fast ASGI server."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4250,7 +4473,7 @@ h11 = ">=0.8"
 httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""}
 python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""}
 pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""}
-uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""}
+uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""}
 watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""}
 websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""}
 
@@ -4261,6 +4484,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
 name = "uvloop"
 version = "0.17.0"
 description = "Fast implementation of asyncio event loop on top of libuv"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4305,6 +4529,7 @@ test = ["Cython (>=0.29.32,<0.30.0)", "aiohttp", "flake8 (>=3.9.2,<3.10.0)", "my
 name = "vine"
 version = "5.0.0"
 description = "Promises, promises, promises."
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4316,6 +4541,7 @@ files = [
 name = "virtualenv"
 version = "20.17.1"
 description = "Virtual Python Environment builder"
+category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -4336,6 +4562,7 @@ testing = ["coverage (>=6.2)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7
 name = "watchfiles"
 version = "0.19.0"
 description = "Simple, modern and high performance file watching and code reload in python."
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4370,6 +4597,7 @@ anyio = ">=3.0.0"
 name = "wcwidth"
 version = "0.2.5"
 description = "Measures the displayed width of unicode strings in a terminal"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4381,6 +4609,7 @@ files = [
 name = "webencodings"
 version = "0.5.1"
 description = "Character encoding aliases for legacy web content"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -4392,6 +4621,7 @@ files = [
 name = "websockets"
 version = "11.0.3"
 description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4471,6 +4701,7 @@ files = [
 name = "wrapt"
 version = "1.15.0"
 description = "Module for decorators, wrappers and monkey patching."
+category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 files = [
@@ -4555,6 +4786,7 @@ files = [
 name = "wsproto"
 version = "1.2.0"
 description = "WebSockets state-machine based protocol implementation"
+category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -4569,6 +4801,7 @@ h11 = ">=0.9.0,<1"
 name = "zope-interface"
 version = "6.0"
 description = "Interfaces for Python"
+category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -4615,4 +4848,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10, <3.12"
-content-hash = "d435d4c7dce4af1c659d4dbe2d712e2091a1514a80439b4e4009404f69bef595"
+content-hash = "d4fbe083bb0ec3fbf23ad5fa55c0819f250d024c3022878c6a8c471d094bf231"
diff --git a/pyproject.toml b/pyproject.toml
index 5ec87e6e65..0fe25deecb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -95,7 +95,7 @@ PyStemmer = "^2.0.1"
 factory-boy = "^3.2.1"
 django-elasticsearch-dsl = "^7.3"
 django-override-storage = "^0.3.2"
-django-ses = {extras = ["events"], version = "^3.3.0"}
+django-ses = {extras = ["events"], version = "^3.5.0"}
 django-environ = "^0.8.1"
 judge-pics = "^2.0.1"
 django-admin-cursor-paginator = "^0.1.2"

From 23221dfc0e548d8da18c75508c98a6e5b2f2078e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 23:10:18 +0000
Subject: [PATCH 006/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e531956054..e9052f9eaf 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ This repository is organized in the following way:
  - scripts: logrotate, systemd, etc, and init scripts for our various configurations and daemons.
 
 
-## Getting Involved 
+## Getting Involved
 
 If you want to get involved send us an email with your contact info or take a look through the [issues list][issues]. There are innumerable things we need help with, but we especially are looking for help with:
 

From b701bc05516840bf9eb1793f304674c4085595e5 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 15 Jun 2023 14:05:55 -0400
Subject: [PATCH 007/372] feat(search.models): Add django-ordered-model

Add django-ordered-model
Add django-ordered-model to Opinions
Update poetry
---
 cl/search/migrations/0019_order_opinions.py  |  71 ++++++++++
 cl/search/migrations/0019_order_opinions.sql | 129 +++++++++++++++++++
 cl/search/models.py                          |   4 +-
 cl/settings/django.py                        |   1 +
 poetry.lock                                  |  13 +-
 pyproject.toml                               |   1 +
 6 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100644 cl/search/migrations/0019_order_opinions.py
 create mode 100644 cl/search/migrations/0019_order_opinions.sql

diff --git a/cl/search/migrations/0019_order_opinions.py b/cl/search/migrations/0019_order_opinions.py
new file mode 100644
index 0000000000..5e446056cc
--- /dev/null
+++ b/cl/search/migrations/0019_order_opinions.py
@@ -0,0 +1,71 @@
+# Generated by Django 4.2.1 on 2023-06-15 17:56
+
+from django.db import migrations, models
+import pgtrigger.compiler
+import pgtrigger.migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("search", "0018_update_cluster_model"),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name="opinion",
+            options={"ordering": ("order",)},
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="opinion",
+            name="update_or_delete_snapshot_delete",
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="opinion",
+            name="update_or_delete_snapshot_update",
+        ),
+        migrations.AddField(
+            model_name="opinion",
+            name="order",
+            field=models.PositiveIntegerField(
+                db_index=True, default=1, editable=False, verbose_name="order"
+            ),
+            preserve_default=False,
+        ),
+        migrations.AddField(
+            model_name="opinionevent",
+            name="order",
+            field=models.PositiveIntegerField(
+                default=1, editable=False, verbose_name="order"
+            ),
+            preserve_default=False,
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="opinion",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_update",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))',
+                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
+                    hash="bcac41027f469bbd394e8671cb0b2fa33e7035f3",
+                    operation="UPDATE",
+                    pgid="pgtrigger_update_or_delete_snapshot_update_67ecd",
+                    table="search_opinion",
+                    when="AFTER",
+                ),
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="opinion",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_delete",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
+                    hash="79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad",
+                    operation="DELETE",
+                    pgid="pgtrigger_update_or_delete_snapshot_delete_1f4fd",
+                    table="search_opinion",
+                    when="AFTER",
+                ),
+            ),
+        ),
+    ]
diff --git a/cl/search/migrations/0019_order_opinions.sql b/cl/search/migrations/0019_order_opinions.sql
new file mode 100644
index 0000000000..3226cb510b
--- /dev/null
+++ b/cl/search/migrations/0019_order_opinions.sql
@@ -0,0 +1,129 @@
+BEGIN;
+--
+-- Change Meta options on opinion
+--
+-- (no-op)
+--
+-- Remove trigger update_or_delete_snapshot_delete from model opinion
+--
+DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
+--
+-- Remove trigger update_or_delete_snapshot_update from model opinion
+--
+DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
+--
+-- Add field order to opinion
+--
+ALTER TABLE "search_opinion" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0);
+ALTER TABLE "search_opinion" ALTER COLUMN "order" DROP DEFAULT;
+--
+-- Add field order to opinionevent
+--
+ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0);
+ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
+--
+-- Create trigger update_or_delete_snapshot_update on model opinion
+--
+
+            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
+                trigger_name NAME
+            )
+            RETURNS BOOLEAN AS $$
+                DECLARE
+                    _pgtrigger_ignore TEXT[];
+                    _result BOOLEAN;
+                BEGIN
+                    BEGIN
+                        SELECT INTO _pgtrigger_ignore
+                            CURRENT_SETTING('pgtrigger.ignore');
+                        EXCEPTION WHEN OTHERS THEN
+                    END;
+                    IF _pgtrigger_ignore IS NOT NULL THEN
+                        SELECT trigger_name = ANY(_pgtrigger_ignore)
+                        INTO _result;
+                        RETURN _result;
+                    ELSE
+                        RETURN FALSE;
+                    END IF;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_67ecd()
+            RETURNS TRIGGER AS $$
+
+                BEGIN
+                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
+                        IF (TG_OP = 'DELETE') THEN
+                            RETURN OLD;
+                        ELSE
+                            RETURN NEW;
+                        END IF;
+                    END IF;
+                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
+            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd
+                AFTER UPDATE ON "search_opinion"
+
+
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))
+                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
+
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS 'bcac41027f469bbd394e8671cb0b2fa33e7035f3';
+
+--
+-- Create trigger update_or_delete_snapshot_delete on model opinion
+--
+
+            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
+                trigger_name NAME
+            )
+            RETURNS BOOLEAN AS $$
+                DECLARE
+                    _pgtrigger_ignore TEXT[];
+                    _result BOOLEAN;
+                BEGIN
+                    BEGIN
+                        SELECT INTO _pgtrigger_ignore
+                            CURRENT_SETTING('pgtrigger.ignore');
+                        EXCEPTION WHEN OTHERS THEN
+                    END;
+                    IF _pgtrigger_ignore IS NOT NULL THEN
+                        SELECT trigger_name = ANY(_pgtrigger_ignore)
+                        INTO _result;
+                        RETURN _result;
+                    ELSE
+                        RETURN FALSE;
+                    END IF;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_1f4fd()
+            RETURNS TRIGGER AS $$
+
+                BEGIN
+                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
+                        IF (TG_OP = 'DELETE') THEN
+                            RETURN OLD;
+                        ELSE
+                            RETURN NEW;
+                        END IF;
+                    END IF;
+                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
+            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd
+                AFTER DELETE ON "search_opinion"
+
+
+                FOR EACH ROW
+                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd();
+
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad';
+
+CREATE INDEX "search_opinion_order_d54dd126" ON "search_opinion" ("order");
+COMMIT;
diff --git a/cl/search/models.py b/cl/search/models.py
index d04587edef..fc6aa75414 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -14,6 +14,7 @@
 from django.utils.encoding import force_str
 from django.utils.text import slugify
 from eyecite import get_citations
+from ordered_model.models import OrderedModel
 
 from cl.citations.utils import get_citation_depth_between_clusters
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -2815,7 +2816,7 @@ def sort_cites(c):
 
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
-class Opinion(AbstractDateTimeModel):
+class Opinion(OrderedModel, AbstractDateTimeModel):
     COMBINED = "010combined"
     UNANIMOUS = "015unamimous"
     LEAD = "020lead"
@@ -2965,6 +2966,7 @@ class Opinion(AbstractDateTimeModel):
         default=False,
         db_index=True,
     )
+    order_with_respect_to = "cluster"
 
     @property
     def siblings(self) -> QuerySet:
diff --git a/cl/settings/django.py b/cl/settings/django.py
index 21b1ba4a7c..a522d824df 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -162,6 +162,7 @@
     "admin_cursor_paginator",
     "pghistory",
     "pgtrigger",
+    "ordered_model",
     # CourtListener Apps
     "cl.alerts",
     "cl.audio",
diff --git a/poetry.lock b/poetry.lock
index 7f91780e6a..b8f82b7eed 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1054,6 +1054,17 @@ files = [
     {file = "django_mathfilters-1.0.0-py3-none-any.whl", hash = "sha256:64200a21bb249fbf27be601d4bbb788779e09c6e063170c097cd82c4d18ebb83"},
 ]
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -4576,4 +4587,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "674af32861e1e5bf9c31401f02a3af0b698be8b60b9492cd89ab5464218efd3e"
+content-hash = "2b4d76ce134a241162a25c9634a4f9fdbf140d261750fdfca63a87ccbac4fcfd"
diff --git a/pyproject.toml b/pyproject.toml
index 91020cf1e0..ee00cd8366 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,6 +108,7 @@ types-dateparser = "^1.1.4.6"
 juriscraper = "^2.5.49"
 uvicorn = {extras = ["standard"], version = "^0.22.0"}
 daphne = "^4.0.0"
+django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From 6cf0d7581be8241eda3d0b8b4a46833efb7de979 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 15 Jun 2023 14:57:38 -0400
Subject: [PATCH 008/372] fix(tests): Update fixtures for opinion model

---
 cl/search/fixtures/functest_opinions.json     | 12 +++--
 cl/search/fixtures/opinions-issue-412.json    |  6 ++-
 cl/search/fixtures/opinions-issue-550.json    |  6 ++-
 cl/search/fixtures/test_objects_search.json   | 18 ++++---
 .../fixtures/api_scotus_map_data.json         |  6 ++-
 .../fixtures/scotus_map_data.json             | 51 ++++++++++++-------
 6 files changed, 66 insertions(+), 33 deletions(-)

diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json
index e4fa89a260..45f5f0b759 100644
--- a/cl/search/fixtures/functest_opinions.json
+++ b/cl/search/fixtures/functest_opinions.json
@@ -64,7 +64,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -134,7 +135,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 11
@@ -184,7 +186,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 12
@@ -254,7 +257,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 12
diff --git a/cl/search/fixtures/opinions-issue-412.json b/cl/search/fixtures/opinions-issue-412.json
index ca6ac33971..2e429ebecf 100644
--- a/cl/search/fixtures/opinions-issue-412.json
+++ b/cl/search/fixtures/opinions-issue-412.json
@@ -64,7 +64,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -134,7 +135,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json
index b0163eb8f8..829a94c7d2 100644
--- a/cl/search/fixtures/opinions-issue-550.json
+++ b/cl/search/fixtures/opinions-issue-550.json
@@ -64,7 +64,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -86,7 +87,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "020lead"
+      "type": "020lead",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index 2255c7edcf..9fddb84fca 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -239,7 +239,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "020lead"
+      "type": "020lead",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 1
@@ -261,7 +262,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 2
@@ -283,7 +285,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 3
@@ -305,7 +308,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 4
@@ -327,7 +331,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 5
@@ -349,7 +354,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 6
diff --git a/cl/visualizations/fixtures/api_scotus_map_data.json b/cl/visualizations/fixtures/api_scotus_map_data.json
index 5b4b19fe73..46dc2f9856 100644
--- a/cl/visualizations/fixtures/api_scotus_map_data.json
+++ b/cl/visualizations/fixtures/api_scotus_map_data.json
@@ -121,7 +121,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "020lead"
+      "type": "020lead",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 1
@@ -143,7 +144,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 2
diff --git a/cl/visualizations/fixtures/scotus_map_data.json b/cl/visualizations/fixtures/scotus_map_data.json
index ce504fe2c9..a885e4df54 100644
--- a/cl/visualizations/fixtures/scotus_map_data.json
+++ b/cl/visualizations/fixtures/scotus_map_data.json
@@ -902,7 +902,8 @@
     "date_created": "2016-02-16T19:49:54.525Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111014
@@ -924,7 +925,8 @@
     "date_created": "2016-02-16T19:49:54.545Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111113
@@ -946,7 +948,8 @@
     "date_created": "2016-02-16T19:49:54.565Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111464
@@ -968,7 +971,8 @@
     "date_created": "2016-02-16T19:49:54.610Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111505
@@ -990,7 +994,8 @@
     "date_created": "2016-02-16T19:49:54.629Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111924
@@ -1012,7 +1017,8 @@
     "date_created": "2016-02-16T19:49:54.575Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 112331
@@ -1034,7 +1040,8 @@
     "date_created": "2016-02-16T19:49:54.537Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 112646
@@ -1056,7 +1063,8 @@
     "date_created": "2016-02-16T19:49:54.583Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 112779
@@ -1078,7 +1086,8 @@
     "date_created": "2016-02-16T19:49:54.592Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 112874
@@ -1100,7 +1109,8 @@
     "date_created": "2016-02-16T19:49:54.602Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 117967
@@ -1122,7 +1132,8 @@
     "date_created": "2016-02-16T19:49:54.553Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 118377
@@ -1144,7 +1155,8 @@
     "date_created": "2016-02-16T19:49:54.621Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 121168
@@ -1166,7 +1178,8 @@
     "date_created": "2016-02-16T19:49:54.658Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 136984
@@ -1188,7 +1201,8 @@
     "date_created": "2016-02-16T19:49:54.647Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 142900
@@ -1210,7 +1224,8 @@
     "date_created": "2016-02-16T19:49:54.666Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 799990
@@ -1232,7 +1247,8 @@
     "date_created": "2016-02-16T19:49:54.636Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 799993
@@ -1254,7 +1270,8 @@
     "date_created": "2016-02-16T19:49:54.513Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 2674862

From 05e9d9856b543579c28de371dcf8823c8ee7e666 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 15 Jun 2023 15:23:23 -0400
Subject: [PATCH 009/372] fix(tests): Update fixtures for opinion model

Take 2
---
 .../fixtures/test_objects_query_counts.json    | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json
index aa909b2fb2..b51117602a 100644
--- a/cl/search/fixtures/test_objects_query_counts.json
+++ b/cl/search/fixtures/test_objects_query_counts.json
@@ -300,7 +300,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"020lead"
+         "type":"020lead",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":1
@@ -324,7 +325,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":2
@@ -348,7 +350,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":3
@@ -371,7 +374,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":4
@@ -395,7 +399,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":5
@@ -418,7 +423,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":6

From b0fc70a56055699c551b59a3ed38a005459905e3 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 16 Jun 2023 12:46:45 -0400
Subject: [PATCH 010/372] feat(models): Override django-ordered-model default

By default it sorts by order - so if we dont
want that feature we simply need to override
the django order with a custom ordered manager
in on the opinion class.

(I think)
---
 cl/search/models.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index fc6aa75414..7fc2c03458 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -14,7 +14,7 @@
 from django.utils.encoding import force_str
 from django.utils.text import slugify
 from eyecite import get_citations
-from ordered_model.models import OrderedModel
+from ordered_model.models import OrderedModel, OrderedModelManager
 
 from cl.citations.utils import get_citation_depth_between_clusters
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -2815,6 +2815,13 @@ def sort_cites(c):
         return 8
 
 
+class CustomOrderedManager(OrderedModelManager):
+    """Override the django ordered model default ordering"""
+
+    def get_queryset(self):
+        return super().get_queryset().order_by()
+
+
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
 class Opinion(OrderedModel, AbstractDateTimeModel):
     COMBINED = "010combined"
@@ -2968,6 +2975,8 @@ class Opinion(OrderedModel, AbstractDateTimeModel):
     )
     order_with_respect_to = "cluster"
 
+    objects = CustomOrderedManager()
+
     @property
     def siblings(self) -> QuerySet:
         # These are other sub-opinions of the current cluster.

From b8fa44563ac4bb42d6ad3020c604da8f8940f187 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 16 Jun 2023 13:44:51 -0400
Subject: [PATCH 011/372] fix(models): Different override for ordering on OP

---
 cl/search/models.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 7fc2c03458..be645bc5e8 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -14,7 +14,7 @@
 from django.utils.encoding import force_str
 from django.utils.text import slugify
 from eyecite import get_citations
-from ordered_model.models import OrderedModel, OrderedModelManager
+from ordered_model.models import OrderedModel
 
 from cl.citations.utils import get_citation_depth_between_clusters
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -2815,13 +2815,6 @@ def sort_cites(c):
         return 8
 
 
-class CustomOrderedManager(OrderedModelManager):
-    """Override the django ordered model default ordering"""
-
-    def get_queryset(self):
-        return super().get_queryset().order_by()
-
-
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
 class Opinion(OrderedModel, AbstractDateTimeModel):
     COMBINED = "010combined"
@@ -2975,7 +2968,8 @@ class Opinion(OrderedModel, AbstractDateTimeModel):
     )
     order_with_respect_to = "cluster"
 
-    objects = CustomOrderedManager()
+    class Meta:
+        ordering = ()
 
     @property
     def siblings(self) -> QuerySet:

From 7429eba0290bc2f931489b5799e90de318cd1512 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 28 Jul 2023 19:30:18 -0600
Subject: [PATCH 012/372] fix(poetry): Fix merge conflicts

---
 poetry.lock    | 13 ++++++++++++-
 pyproject.toml |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index e034727158..24dc7977e0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1072,6 +1072,17 @@ files = [
     {file = "django_mathfilters-1.0.0-py3-none-any.whl", hash = "sha256:64200a21bb249fbf27be601d4bbb788779e09c6e063170c097cd82c4d18ebb83"},
 ]
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -4690,4 +4701,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "10446165560282337aada87c0f3a9324dc904777bbfcc0f7e35db5c9d13a10a9"
+content-hash = "7c0448e0852dba4f13177892cc0e619e2b58470f4d82707d8069fbeceb1cb919"
diff --git a/pyproject.toml b/pyproject.toml
index f9d568defa..2caee093ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,6 +110,7 @@ daphne = "^4.0.0"
 psycopg2 = "^2.9.6"
 juriscraper = "^2.5.51"
 httpx = {extras = ["http2"], version = "^0.24.1"}
+django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From f45a093c6b02ed5ae4a1077062295fa25f1c4894 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 31 Jul 2023 19:43:51 -0600
Subject: [PATCH 013/372] fix(models): Add 'order' field as default ordering
 for Opinion model

Test added for django-ordered-model library
Optimize imports in search/tests.py
---
 cl/search/models.py |  2 +-
 cl/search/tests.py  | 69 +++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 5024bdcc3d..e50987c3f6 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2986,7 +2986,7 @@ class Opinion(OrderedModel, AbstractDateTimeModel):
     order_with_respect_to = "cluster"
 
     class Meta:
-        ordering = ()
+        ordering = ("order",)
 
     @property
     def siblings(self) -> QuerySet:
diff --git a/cl/search/tests.py b/cl/search/tests.py
index b5c71c9b16..c4edb24b31 100644
--- a/cl/search/tests.py
+++ b/cl/search/tests.py
@@ -5,7 +5,7 @@
 from datetime import date
 from functools import reduce
 from pathlib import Path
-from unittest import mock, skipUnless
+from unittest import mock
 
 import pytz
 from asgiref.sync import sync_to_async
@@ -19,9 +19,8 @@
 from django.db import IntegrityError, transaction
 from django.http import HttpRequest
 from django.test import AsyncRequestFactory, override_settings
-from django.test.utils import captured_stderr
 from django.urls import reverse
-from elasticsearch_dsl import Q, connections
+from elasticsearch_dsl import Q
 from factory import RelatedFactory
 from lxml import etree, html
 from rest_framework.status import HTTP_200_OK
@@ -58,6 +57,7 @@
     DocketFactory,
     OpinionClusterFactory,
     OpinionClusterFactoryWithChildrenAndParents,
+    OpinionFactory,
     OpinionsCitedWithParentsFactory,
     OpinionWithChildrenFactory,
     OpinionWithParentsFactory,
@@ -283,6 +283,69 @@ def test_custom_manager_chained_filter(self) -> None:
         )
         self.assertEqual(cluster_count, expected_count)
 
+    def test_opinions_order(self) -> None:
+        """Test django-ordered-model library"""
+
+        # Create court
+        court = CourtFactory(id="nyappdiv")
+
+        # Create cluster
+        cluster = OpinionClusterFactory(
+            case_name="Foo v. Bar",
+            case_name_short="Foo v. Bar",
+            docket=DocketFactory(
+                court=court,
+            ),
+            date_filed=date(1978, 3, 10),
+            source="U",
+            precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
+        )
+
+        # Create three opinions
+        op_1 = OpinionFactory(
+            cluster=cluster,
+            type="Concurrence Opinion",
+        )
+
+        op_2 = OpinionFactory(
+            cluster=cluster,
+            type="Dissent",
+        )
+
+        op_3 = OpinionFactory(
+            cluster=cluster,
+            type="Lead Opinion",
+        )
+
+        # Test that the value of the order field matches the order in which
+        # they were created
+        self.assertEqual(op_1.order, 0)
+        self.assertEqual(op_2.order, 1)
+        self.assertEqual(op_3.order, 2)
+
+        # Use library method to move lead opinion to first position, we can
+        # use this function to easily reorder existing opinions
+        op_3.to(0)
+
+        # The position of the elements was modified, we refresh the objects
+        op_1.refresh_from_db()
+        op_2.refresh_from_db()
+        op_3.refresh_from_db()
+
+        # Test new order
+        self.assertEqual(op_3.order, 0)
+        self.assertEqual(op_1.order, 1)
+        self.assertEqual(op_2.order, 2)
+
+        # Add new opinion to cluster
+        op_4 = OpinionFactory(
+            cluster=cluster,
+            type="Dissent",
+        )
+
+        # Test that the new opinion is in last place
+        self.assertEqual(op_4.order, 3)
+
 
 class DocketValidationTest(TestCase):
     @classmethod

From 37dee19fcfacf95a79aac71c21ccc507d10289b4 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 11:27:50 -0600
Subject: [PATCH 014/372] fix(opinion_order): fix merge conflicts with main

---
 poetry.lock    | 17 ++++++++++++++---
 pyproject.toml |  1 +
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a23818c13e..9b7321deb1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
 
 [[package]]
 name = "amqp"
@@ -1094,6 +1094,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -2718,7 +2729,7 @@ name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
 optional = false
-python-versions = ">=2.7,<3.0.0 || >=3.4.0"
+python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -5091,4 +5102,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "5257a6d2a26b74054bac82d0c5700a55f1e2e2ec580608921e8a27a76d015f52"
+content-hash = "46adbdc75bf4ad70aa4d6531f4d71a8f22f1e85ee9886408e921e7147aab7a36"
diff --git a/pyproject.toml b/pyproject.toml
index 87d6e90ff9..ef5970143f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -113,6 +113,7 @@ juriscraper = "^2.5.51"
 httpx = {extras = ["http2"], version = "^0.24.1"}
 django-model-utils = "^4.3.1"
 inflection = "^0.5.1"  # necessary for DRF schema generation - remove after drf-spectacular
+django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From 3b4cb06ef8724d5052f9868f8d77388acfe18be1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 11:55:03 -0600
Subject: [PATCH 015/372] fix(opinion_order): rename migrations

---
 .../{0019_order_opinions.py => 0020_order_opinions.py}          | 2 +-
 .../{0019_order_opinions.sql => 0020_order_opinions.sql}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0019_order_opinions.py => 0020_order_opinions.py} (99%)
 rename cl/search/migrations/{0019_order_opinions.sql => 0020_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0019_order_opinions.py b/cl/search/migrations/0020_order_opinions.py
similarity index 99%
rename from cl/search/migrations/0019_order_opinions.py
rename to cl/search/migrations/0020_order_opinions.py
index 5e446056cc..f614156360 100644
--- a/cl/search/migrations/0019_order_opinions.py
+++ b/cl/search/migrations/0020_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0018_update_cluster_model"),
+        ("search", "0019_add_docket_source_noop"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0019_order_opinions.sql b/cl/search/migrations/0020_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0019_order_opinions.sql
rename to cl/search/migrations/0020_order_opinions.sql

From 878b9479e9c95b429b16c6bd044a2315b6cce3f3 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 17:11:30 -0600
Subject: [PATCH 016/372] feat(opinion_order): management command to update the
 order of harvard and columbia opinions

---
 .../commands/update_opinions_order.py         | 598 ++++++++++++++++++
 1 file changed, 598 insertions(+)
 create mode 100644 cl/corpus_importer/management/commands/update_opinions_order.py

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
new file mode 100644
index 0000000000..f48de154a0
--- /dev/null
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -0,0 +1,598 @@
+import re
+from typing import Any, Optional
+
+from bs4 import BeautifulSoup, NavigableString, Tag
+from django.core.management import BaseCommand
+from django.db.models import Count
+
+from cl.corpus_importer.utils import similarity_scores
+from cl.lib.command_utils import logger
+from cl.lib.string_diff import get_cosine_similarity
+from cl.search.models import Opinion, OpinionCluster
+
+# TODO Should we add a flag to know that the cluster has been processed?
+
+
+def match_text_lists(
+    file_opinions_list: list[str], cl_opinions_list: list[str]
+) -> dict[int, Any]:
+    """Generate matching lists above threshold
+    :param file_opinions_list: Opinions from file
+    :param cl_opinions_list: CL opinions
+    :return: Matches if found or False
+    """
+    # We import this here to avoid a circular import
+    from cl.corpus_importer.management.commands.harvard_opinions import (
+        compare_documents,
+    )
+
+    scores = similarity_scores(file_opinions_list, cl_opinions_list)
+
+    matches = {}
+    for i, row in enumerate(scores):
+        j = row.argmax()  # type: ignore
+        # Lower threshold for small opinions.
+        if (
+            get_cosine_similarity(file_opinions_list[i], cl_opinions_list[j])
+            < 0.60
+        ):
+            continue
+        percent_match = compare_documents(
+            file_opinions_list[i], cl_opinions_list[j]
+        )
+        if percent_match < 60:
+            continue
+        matches[i] = j
+
+    # Key is opinion position from file, Value is opinion position from cl opinion
+    # e.g. matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file
+    # opinion and 2 is cl opinion
+    return matches
+
+
+def get_opinion_content(
+    cluster_id,
+) -> tuple[Optional[str], list[dict], int, bool]:
+    """Get the opinions content for a cluster object
+    :param cluster_id: Cluster ID for a set of opinions
+    :return: (xml path, list of extracted opinions, start position, True if combined
+    opinions exists in cluster)
+    """
+    cl_cleaned_opinions = []
+    # by default the opinions are ordered by pk
+    opinions_from_cluster = Opinion.objects.filter(
+        cluster_id=cluster_id
+    ).order_by("id")
+    combined_opinions_cluster = opinions_from_cluster.filter(
+        type="010combined"
+    )
+    xml_path = None
+    combined_opinion = False
+    if combined_opinions_cluster:
+        # the combined opinion will be displayed at beginning
+        start_position = combined_opinions_cluster.count()
+        combined_opinion = True
+    else:
+        # we don't have combined opinions, we start ordering from 0 to n
+        start_position = 0
+
+    for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
+        if op.local_path and not xml_path:
+            xml_path = op.local_path
+        content = None
+        if len(op.html_with_citations) > 1:
+            content = op.html_with_citations
+        elif len(op.html_columbia) > 1:
+            content = op.html_columbia
+        elif len(op.html_lawbox) > 1:
+            content = op.html_lawbox
+        elif len(op.plain_text) > 1:
+            content = op.plain_text
+        elif len(op.html) > 1:
+            content = op.html
+        elif len(op.xml_harvard) > 1:
+            content = op.xml_harvard
+        if content:
+            soup = BeautifulSoup(content, features="html.parser")
+            prep_text = re.sub(
+                r"[^a-zA-Z0-9 ]", "", soup.getText(separator=" ").lower()
+            )
+            prep_text = re.sub(" +", " ", prep_text)
+            cl_cleaned_opinions.append(
+                {
+                    "id": op.id,
+                    "byline": op.author_str,
+                    "type": op.type,
+                    "opinion": prep_text,
+                    "order": i,
+                }
+            )
+
+    return xml_path, cl_cleaned_opinions, start_position, combined_opinion
+
+
+def get_opinions_columbia_xml(xml_filepath: str) -> list:
+    """Convert xml data into dict
+    :param xml_filepath: path of xml file
+    :return: dict with data
+    """
+
+    SIMPLE_TAGS = [
+        "attorneys",
+        "caption",
+        "citation",
+        "court",
+        "date",
+        "docket",
+        "hearing_date",
+        "panel",
+        "posture",
+        "reporter_caption",
+    ]
+
+    data = {}  # type: dict
+
+    with open(xml_filepath, "r", encoding="utf-8") as f:
+        file_content = f.read()
+
+        data["unpublished"] = False
+
+        if "<opinion unpublished=true>" in file_content:
+            file_content = file_content.replace(
+                "<opinion unpublished=true>", "<opinion>"
+            )
+            file_content = file_content.replace("<unpublished>", "").replace(
+                "</unpublished>", ""
+            )
+
+            data["unpublished"] = True
+
+    # Sometimes opening and ending tag mismatch (e.g. c6b39dcb29c9c.xml)
+    file_content = file_content.replace(
+        "</footnote_body></block_quote>", "</block_quote></footnote_body>"
+    )
+
+    soup = BeautifulSoup(file_content, "lxml")
+
+    # Find the outer <opinion> tag to have all elements inside
+    find_opinion = soup.find("opinion")
+
+    step_one_opinions = []  # type: list
+    opinions = []  # type: list
+    order = 0
+
+    if find_opinion:
+        untagged_content = []
+
+        # We iterate all content, with and without tags
+        # STEP 1: Extract all content in multiple dict elements
+        for i, content in enumerate(find_opinion):  # type: int, Tag
+            if type(content) == NavigableString:
+                # We found a raw string, store it
+                untagged_content.append(str(content))
+
+            else:
+                if content.name in SIMPLE_TAGS + [
+                    "citation_line",
+                    "opinion_byline",
+                    "dissent_byline",
+                    "concurrence_byline",
+                ]:
+                    # Ignore these tags, it will be processed later
+                    continue
+                elif content.name in [
+                    "opinion_text",
+                    "dissent_text",
+                    "concurrence_text",
+                ]:
+                    if untagged_content:
+                        # We found something other than a navigable string that is
+                        # not an opinion, but now we have found an opinion,
+                        # let's create this content first
+
+                        # default type
+                        op_type = "opinion"
+                        if step_one_opinions:
+                            if step_one_opinions[-1].get("type"):
+                                # use type of previous opinion if exists
+                                op_type = step_one_opinions[-1].get("type")
+
+                        # Get rid of double spaces
+                        opinion_content = re.sub(
+                            " +", " ", "\n".join(untagged_content)
+                        ).strip()  # type: str
+                        if opinion_content:
+                            step_one_opinions.append(
+                                {
+                                    "opinion": opinion_content,
+                                    "order": order,
+                                    "byline": "",
+                                    "type": op_type,
+                                }
+                            )
+                            order = order + 1
+                        untagged_content = []
+
+                    byline = content.find_previous_sibling()
+                    opinion_author = ""
+                    if byline and "_byline" in byline.name:
+                        opinion_author = byline.get_text()
+
+                    opinion_content = re.sub(
+                        " +", " ", content.decode_contents()
+                    ).strip()
+                    if opinion_content:
+                        step_one_opinions.append(
+                            {
+                                "opinion": opinion_content,
+                                "order": order,
+                                "byline": opinion_author,
+                                "type": content.name.replace("_text", ""),
+                            }
+                        )
+                        order = order + 1
+
+                else:
+                    # Content not inside _text tag, we store it
+                    untagged_content.append(str(content))
+
+        if untagged_content:
+            # default type
+            op_type = "opinion"
+            if step_one_opinions:
+                if step_one_opinions[-1].get("type"):
+                    # use type of previous opinion if exists
+                    op_type = step_one_opinions[-1].get("type")
+
+            opinion_content = re.sub(
+                " +", " ", "\n".join(untagged_content)
+            ).strip()
+            if opinion_content:
+                step_one_opinions.append(
+                    {
+                        "opinion": opinion_content,
+                        "order": order,
+                        "byline": "",
+                        "type": op_type,
+                    }
+                )
+
+        # Step 2: Merge found content in the xml file
+        new_order = 0
+        authorless_content = []
+
+        for i, found_content in enumerate(step_one_opinions, start=1):
+            byline = found_content.get("byline")
+            if not byline:
+                # Opinion has no byline, store it
+                authorless_content.append(found_content)
+
+            if byline:
+                # Opinion has byline
+                opinion_type = found_content.get("type")
+                opinion_content = found_content.get("opinion", "")
+                # Store content that doesn't match the current type
+                alternative_authorless_content = [
+                    z
+                    for z in authorless_content
+                    if z.get("type") != opinion_type
+                ]
+                # Keep content that matches the current type
+                authorless_content = [
+                    z
+                    for z in authorless_content
+                    if z.get("type") == opinion_type
+                ]
+
+                if alternative_authorless_content:
+                    # Keep floating text that are not from the same type,
+                    # we need to create a separate opinion for those,
+                    # for example: in 2713f39c5a8e8684.xml we have an opinion
+                    # without an author, and the next opinion with an author is
+                    # a dissent opinion, we can't combine both
+
+                    # We check if the previous stored opinion matches the type of the
+                    # content
+                    relevant_opinions = (
+                        [opinions[-1]]
+                        if opinions
+                        and opinions[-1]["type"]
+                        == alternative_authorless_content[0].get("type")
+                        else []
+                    )
+
+                    if relevant_opinions:
+                        previous_opinion = relevant_opinions[-1]
+                        if previous_opinion.get(
+                            "type"
+                        ) == alternative_authorless_content[0].get("type"):
+                            # Merge last opinion with previous opinion, it probably
+                            # belongs the same author
+                            relevant_opinions[-1][
+                                "opinion"
+                            ] += "\n" + "\n".join(
+                                [
+                                    f.get("opinion")
+                                    for f in alternative_authorless_content
+                                    if f.get("opinion")
+                                ]
+                            )
+                        authorless_content = []
+
+                    else:
+                        # No relevant opinions found, create a new opinion
+                        new_opinion = {
+                            "byline": None,
+                            "type": alternative_authorless_content[0].get(
+                                "type"
+                            ),
+                            "opinion": "\n".join(
+                                [
+                                    f.get("opinion")
+                                    for f in alternative_authorless_content
+                                    if f.get("opinion")
+                                ]
+                            ),
+                            "order": new_order,
+                        }
+                        new_order = new_order + 1
+                        opinions.append(new_opinion)
+
+                # Add new opinion
+                new_opinion = {
+                    "byline": byline,
+                    "type": opinion_type,
+                    "opinion": "\n".join(
+                        [
+                            f.get("opinion")
+                            for f in authorless_content
+                            if f.get("type") == opinion_type
+                        ]
+                    )
+                    + "\n\n"
+                    + opinion_content,
+                    "order": new_order,
+                }
+
+                opinions.append(new_opinion)
+                new_order = new_order + 1
+                authorless_content = []
+
+            if len(step_one_opinions) == i and authorless_content:
+                # If is the last opinion, and we still have opinions without
+                # byline, create an opinion without an author and the contents
+                # that couldn't be merged
+
+                # We check if the previous stored opinion matches the type of the
+                # content
+                relevant_opinions = (
+                    [opinions[-1]]
+                    if opinions
+                    and opinions[-1]["type"]
+                    == authorless_content[0].get("type")
+                    else []
+                )
+
+                if relevant_opinions:
+                    previous_opinion = relevant_opinions[-1]
+                    if previous_opinion.get("type") == authorless_content[
+                        0
+                    ].get("type"):
+                        # Merge last opinion with previous opinion, it probably
+                        # belongs the same author
+                        relevant_opinions[-1]["opinion"] += "\n" + "\n".join(
+                            [
+                                f.get("opinion")
+                                for f in authorless_content
+                                if f.get("opinion")
+                            ]
+                        )
+
+                else:
+                    # Create last floating opinion
+                    new_opinion = {
+                        "byline": None,
+                        "type": authorless_content[0].get("type"),
+                        "opinion": "\n".join(
+                            [
+                                f.get("opinion")
+                                for f in authorless_content
+                                if f.get("opinion")
+                            ]
+                        ),
+                        "order": new_order,
+                    }
+                    opinions.append(new_opinion)
+
+    for op in opinions:
+        opinion_content = op.get("opinion")
+        opinion_content = BeautifulSoup(
+            opinion_content, "html.parser"
+        ).getText()
+        opinion_content = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_content.lower())
+        op["opinion"] = opinion_content
+
+    return opinions
+
+
+def run_harvard():
+    """
+    We assume that harvard data is already ordered, we just need to fill the order
+    field in each opinion
+    """
+
+    # Get all harvard clusters with more than one opinion
+    clusters = (
+        OpinionCluster.objects.prefetch_related("sub_opinions")
+        .annotate(opinions_count=Count("sub_opinions"))
+        .filter(opinions_count__gt=1, source="U")
+    )
+    # print(clusters.query)
+    print("clusters", len(clusters))
+
+    # cluster_id: 4697264, the combined opinion will go to the last position
+    for oc in clusters:
+        combined_opinions_cluster = oc.sub_opinions.filter(
+            type="010combined"
+        ).order_by("id")
+        if combined_opinions_cluster:
+            # the combined opinion will be displayed at first
+            start_position = combined_opinions_cluster.count()
+        else:
+            # we don't have combined opinions, we start ordering from 0 to n
+            start_position = 0
+
+        print("combined_opinions_cluster", combined_opinions_cluster)
+        for opinion_order, cluster_op in enumerate(
+            oc.sub_opinions.exclude(type="010combined").order_by("id"),
+            start=start_position,
+        ):
+            cluster_op.order = opinion_order
+            cluster_op.save()
+
+        # Show combined opinions at beginning
+        for opinion_order, cluster_op in enumerate(combined_opinions_cluster):
+            cluster_op.order = opinion_order
+            cluster_op.save()
+
+        logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
+
+
+def run_columbia():
+    """
+    Update opinion order for columbia clusters
+    """
+
+    # Get all columbia cluster ids with more than one opinion
+    clusters = (
+        OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
+        .filter(opinions_count__gt=1, source="Z")
+        .order_by("id")
+        .values_list("id")
+    )
+
+    for cluster_id in clusters:
+        logger.info(f"Processing cluster id: {cluster_id}")
+        (
+            xml_path,
+            cl_cleaned_opinions,
+            start_position,
+            combined_opinion,
+        ) = get_opinion_content(cluster_id)
+
+        columbia_opinions = None
+        if xml_path:
+            columbia_opinions = get_opinions_columbia_xml(xml_path)
+
+        if cl_cleaned_opinions and columbia_opinions:
+            matches = match_text_lists(
+                [op.get("opinion") for op in columbia_opinions],
+                [op.get("opinion") for op in cl_cleaned_opinions],
+            )
+
+            if matches:
+                if len(matches.values()) != len(set(matches.values())):
+                    # We don't have a unique match for each opinion, they were
+                    # probably combined incorrectly
+                    logger.info(
+                        f"We can't infer opinions order for cluster id: {cluster_id}"
+                    )
+                    # Go to next cluster id
+                    continue
+
+                if len(cl_cleaned_opinions) > len(set(matches.values())):
+                    # We have more opinions than matches
+                    logger.info(
+                        f"We couldn't match all cl opinions to the file's "
+                        f"content, cluster id: {cluster_id}"
+                    )
+                    # Go to next cluster id
+                    continue
+
+                failed = False
+                for file_pos, cl_pos in matches.items():
+                    # file_pos is the correct index to find the opinion id to update
+                    file_opinion = columbia_opinions[file_pos]
+                    # the order was calculated using the xml file
+                    file_order = file_opinion.get("order") + start_position
+                    cl_opinion = cl_cleaned_opinions[cl_pos]
+                    opinion_id_to_update = cl_opinion.get("id")
+
+                    if opinion_id_to_update:
+                        try:
+                            # Save opinion
+                            op = Opinion.objects.get(id=opinion_id_to_update)
+                            op.order = file_order
+                            op.save()
+                            logger.info(
+                                f"Cluster id processed: {cluster_id} Update opinion id: {opinion_id_to_update} with position: {file_order}"
+                            )
+                        except Opinion.DoesNotExist:
+                            logger.warning(
+                                f"We can't update opinion, opinion doesn't exist with "
+                                f"id: {opinion_id_to_update}"
+                            )
+                            failed = True
+                            break
+                    else:
+                        logger.warning(
+                            f"We can't update opinion, empty opinion id "
+                            f"from cluster: {cluster_id}"
+                        )
+                        failed = True
+                        break
+
+                if combined_opinion and not failed:
+                    combined_opinions_cluster = Opinion.objects.filter(
+                        cluster_id=cluster_id, type="010combined"
+                    ).order_by("id")
+
+                    # Show combined opinions at beginning
+                    for opinion_order, cluster_op in enumerate(
+                        combined_opinions_cluster
+                    ):
+                        cluster_op.order = opinion_order
+                        cluster_op.save()
+
+            else:
+                # No matches found
+                logger.warning(
+                    f"Failed to match opinions from cluster id: {cluster_id}"
+                )
+                continue
+
+
+class Command(BaseCommand):
+    help = "Fill order field in Opinion objects"
+
+    def __init__(self, *args, **kwargs):
+        super(Command, self).__init__(*args, **kwargs)
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--process-harvard",
+            action="store_true",
+            help="Fix harvard opinions order",
+        )
+
+        parser.add_argument(
+            "--process-columbia",
+            action="store_true",
+            help="Fix columbia opinions order",
+        )
+
+    def handle(self, *args, **options):
+        print("harvard", options["process_harvard"])
+        print("columbia", options["process_columbia"])
+
+        if options["process_harvard"] and options["process_columbia"]:
+            print(
+                "You can only select one option process-harvard or process-columbia"
+            )
+            return
+
+        if options["process_harvard"]:
+            run_harvard()
+
+        if options["process_columbia"]:
+            run_columbia()

From c3a5c4a2a0ad002b075ea69b3a0757bbef684a1f Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 19:13:02 -0600
Subject: [PATCH 017/372] feat(opinion_order): exception when xml file not
 found

---
 .../commands/update_opinions_order.py         | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index f48de154a0..0560c506ba 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -4,6 +4,7 @@
 from bs4 import BeautifulSoup, NavigableString, Tag
 from django.core.management import BaseCommand
 from django.db.models import Count
+from django.db.models.fields.files import FieldFile
 
 from cl.corpus_importer.utils import similarity_scores
 from cl.lib.command_utils import logger
@@ -52,7 +53,7 @@ def match_text_lists(
 
 def get_opinion_content(
     cluster_id,
-) -> tuple[Optional[str], list[dict], int, bool]:
+) -> tuple[Optional[FieldFile], list[dict], int, bool]:
     """Get the opinions content for a cluster object
     :param cluster_id: Cluster ID for a set of opinions
     :return: (xml path, list of extracted opinions, start position, True if combined
@@ -78,6 +79,8 @@ def get_opinion_content(
 
     for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
         if op.local_path and not xml_path:
+            # We store the field because we are using S3 for storage and that backend
+            # doesn't support absolute paths
             xml_path = op.local_path
         content = None
         if len(op.html_with_citations) > 1:
@@ -111,7 +114,7 @@ def get_opinion_content(
     return xml_path, cl_cleaned_opinions, start_position, combined_opinion
 
 
-def get_opinions_columbia_xml(xml_filepath: str) -> list:
+def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
     """Convert xml data into dict
     :param xml_filepath: path of xml file
     :return: dict with data
@@ -132,8 +135,8 @@ def get_opinions_columbia_xml(xml_filepath: str) -> list:
 
     data = {}  # type: dict
 
-    with open(xml_filepath, "r", encoding="utf-8") as f:
-        file_content = f.read()
+    with xml_filepath.open("r") as f:
+        file_content = f.read().decode("utf-8")
 
         data["unpublished"] = False
 
@@ -432,6 +435,7 @@ def run_harvard():
 
     # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
+        logger.info(f"Processing cluster id: {oc}")
         combined_opinions_cluster = oc.sub_opinions.filter(
             type="010combined"
         ).order_by("id")
@@ -468,7 +472,7 @@ def run_columbia():
         OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1, source="Z")
         .order_by("id")
-        .values_list("id")
+        .values_list("id", flat=True)
     )
 
     for cluster_id in clusters:
@@ -482,7 +486,11 @@ def run_columbia():
 
         columbia_opinions = None
         if xml_path:
-            columbia_opinions = get_opinions_columbia_xml(xml_path)
+            try:
+                columbia_opinions = get_opinions_columbia_xml(xml_path)
+            except FileNotFoundError:
+                logger.warning(f"Xml file not found, cluster id: {cluster_id}")
+                continue
 
         if cl_cleaned_opinions and columbia_opinions:
             matches = match_text_lists(

From 6ba8d3d3b1048ba4dfaf79ef60b72bf5fff8e55f Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 19:37:30 -0600
Subject: [PATCH 018/372] feat(opinion_order): add param to resume command to
 order opinions

---
 .../commands/update_opinions_order.py         | 33 ++++++++++++-------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 0560c506ba..d4d915695d 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -82,6 +82,7 @@ def get_opinion_content(
             # We store the field because we are using S3 for storage and that backend
             # doesn't support absolute paths
             xml_path = op.local_path
+            # print("url", op.local_path.url)
         content = None
         if len(op.html_with_citations) > 1:
             content = op.html_with_citations
@@ -136,7 +137,7 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
     data = {}  # type: dict
 
     with xml_filepath.open("r") as f:
-        file_content = f.read().decode("utf-8")
+        file_content = f.read()
 
         data["unpublished"] = False
 
@@ -418,10 +419,11 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
     return opinions
 
 
-def run_harvard():
+def run_harvard(start_id: int):
     """
     We assume that harvard data is already ordered, we just need to fill the order
     field in each opinion
+    :param start_id: skip any id lower than this value
     """
 
     # Get all harvard clusters with more than one opinion
@@ -429,9 +431,11 @@ def run_harvard():
         OpinionCluster.objects.prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1, source="U")
+        .order_by("id")
     )
-    # print(clusters.query)
-    print("clusters", len(clusters))
+
+    if start_id:
+        clusters = clusters.filter(pk__gte=start_id)
 
     # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
@@ -446,7 +450,6 @@ def run_harvard():
             # we don't have combined opinions, we start ordering from 0 to n
             start_position = 0
 
-        print("combined_opinions_cluster", combined_opinions_cluster)
         for opinion_order, cluster_op in enumerate(
             oc.sub_opinions.exclude(type="010combined").order_by("id"),
             start=start_position,
@@ -462,9 +465,10 @@ def run_harvard():
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
-def run_columbia():
+def run_columbia(start_id: int):
     """
     Update opinion order for columbia clusters
+    :param start_id: skip any id lower than this value
     """
 
     # Get all columbia cluster ids with more than one opinion
@@ -475,6 +479,9 @@ def run_columbia():
         .values_list("id", flat=True)
     )
 
+    if start_id:
+        clusters = filter(lambda x: x >= start_id, clusters)
+
     for cluster_id in clusters:
         logger.info(f"Processing cluster id: {cluster_id}")
         (
@@ -589,10 +596,14 @@ def add_arguments(self, parser):
             help="Fix columbia opinions order",
         )
 
-    def handle(self, *args, **options):
-        print("harvard", options["process_harvard"])
-        print("columbia", options["process_columbia"])
+        parser.add_argument(
+            "--start-id",
+            type=int,
+            default=0,
+            help="Skip any id lower than this value",
+        )
 
+    def handle(self, *args, **options):
         if options["process_harvard"] and options["process_columbia"]:
             print(
                 "You can only select one option process-harvard or process-columbia"
@@ -600,7 +611,7 @@ def handle(self, *args, **options):
             return
 
         if options["process_harvard"]:
-            run_harvard()
+            run_harvard(options["start_id"])
 
         if options["process_columbia"]:
-            run_columbia()
+            run_columbia(options["start_id"])

From 71ec6241cc0c06d4aaebfb71a0cec188eb39a11a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 19:47:56 -0600
Subject: [PATCH 019/372] feat(opinion_order): add new param for command

---
 .../commands/update_opinions_order.py         | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index d4d915695d..7a46530a82 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -11,8 +11,6 @@
 from cl.lib.string_diff import get_cosine_similarity
 from cl.search.models import Opinion, OpinionCluster
 
-# TODO Should we add a flag to know that the cluster has been processed?
-
 
 def match_text_lists(
     file_opinions_list: list[str], cl_opinions_list: list[str]
@@ -419,11 +417,12 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
     return opinions
 
 
-def run_harvard(start_id: int):
+def run_harvard(start_id: int, end_id: int):
     """
     We assume that harvard data is already ordered, we just need to fill the order
     field in each opinion
     :param start_id: skip any id lower than this value
+    :param end_id: skip any id greater than this value
     """
 
     # Get all harvard clusters with more than one opinion
@@ -437,6 +436,9 @@ def run_harvard(start_id: int):
     if start_id:
         clusters = clusters.filter(pk__gte=start_id)
 
+    if end_id:
+        clusters = clusters.filter(pk__lte=end_id)
+
     # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
         logger.info(f"Processing cluster id: {oc}")
@@ -465,10 +467,11 @@ def run_harvard(start_id: int):
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
-def run_columbia(start_id: int):
+def run_columbia(start_id: int, end_id: int):
     """
     Update opinion order for columbia clusters
     :param start_id: skip any id lower than this value
+    :param end_id: skip any id greater than this value
     """
 
     # Get all columbia cluster ids with more than one opinion
@@ -482,6 +485,9 @@ def run_columbia(start_id: int):
     if start_id:
         clusters = filter(lambda x: x >= start_id, clusters)
 
+    if end_id:
+        clusters = filter(lambda x: x <= end_id, clusters)
+
     for cluster_id in clusters:
         logger.info(f"Processing cluster id: {cluster_id}")
         (
@@ -600,7 +606,14 @@ def add_arguments(self, parser):
             "--start-id",
             type=int,
             default=0,
-            help="Skip any id lower than this value",
+            help="Start id for a range of clusters (inclusive)",
+        )
+
+        parser.add_argument(
+            "--end-id",
+            type=int,
+            default=0,
+            help="End id for a range of clusters (inclusive)",
         )
 
     def handle(self, *args, **options):
@@ -611,7 +624,7 @@ def handle(self, *args, **options):
             return
 
         if options["process_harvard"]:
-            run_harvard(options["start_id"])
+            run_harvard(options["start_id"], options["end_id"])
 
         if options["process_columbia"]:
-            run_columbia(options["start_id"])
+            run_columbia(options["start_id"], options["end_id"])

From f4615b07d931f93b7a2409438d17f85d6582f4a9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 31 Aug 2023 13:35:04 -0600
Subject: [PATCH 020/372] feat(opinion_order): update typing

---
 .../commands/update_opinions_order.py         | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 7a46530a82..480f2ef6d6 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,5 +1,5 @@
 import re
-from typing import Any, Optional
+from typing import Any, List, Optional
 
 from bs4 import BeautifulSoup, NavigableString, Tag
 from django.core.management import BaseCommand
@@ -13,12 +13,12 @@
 
 
 def match_text_lists(
-    file_opinions_list: list[str], cl_opinions_list: list[str]
-) -> dict[int, Any]:
+    file_opinions_list: List[Any], cl_opinions_list: List[Any]
+) -> dict[int, int]:
     """Generate matching lists above threshold
     :param file_opinions_list: Opinions from file
     :param cl_opinions_list: CL opinions
-    :return: Matches if found or False
+    :return: Matches if found or empty dict
     """
     # We import this here to avoid a circular import
     from cl.corpus_importer.management.commands.harvard_opinions import (
@@ -507,8 +507,16 @@ def run_columbia(start_id: int, end_id: int):
 
         if cl_cleaned_opinions and columbia_opinions:
             matches = match_text_lists(
-                [op.get("opinion") for op in columbia_opinions],
-                [op.get("opinion") for op in cl_cleaned_opinions],
+                [
+                    op.get("opinion")
+                    for op in columbia_opinions
+                    if op.get("opinion")
+                ],
+                [
+                    op.get("opinion")
+                    for op in cl_cleaned_opinions
+                    if op.get("opinion")
+                ],
             )
 
             if matches:

From 3ceff218c23c77201b3b78fd7bda838db09a2706 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 31 Aug 2023 14:30:00 -0600
Subject: [PATCH 021/372] feat(opinion_order): temporary read xml files from s3

it requires to change the AWS_STORAGE_BUCKET_NAME env variable to read files from private storage
---
 .../management/commands/update_opinions_order.py   | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 480f2ef6d6..0b96a5dae1 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -134,6 +134,16 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
 
     data = {}  # type: dict
 
+    if "/home/mlissner" in str(xml_filepath):
+        # Temporary replace the path with the correct from S3, this way we read them
+        # directly from S3, we need the files in /sources/columbia/opinions/ in
+        # com-courtlistener-storage bucket
+        # TODO discuss this
+        xml_filepath.name = xml_filepath.name.replace(
+            "/home/mlissner", "/sources"
+        )
+
+    # print(f"Opening {xml_filepath.url}")
     with xml_filepath.open("r") as f:
         file_content = f.read()
 
@@ -502,7 +512,9 @@ def run_columbia(start_id: int, end_id: int):
             try:
                 columbia_opinions = get_opinions_columbia_xml(xml_path)
             except FileNotFoundError:
-                logger.warning(f"Xml file not found, cluster id: {cluster_id}")
+                logger.warning(
+                    f"Xml file not found in {xml_path}, cluster id: {cluster_id}"
+                )
                 continue
 
         if cl_cleaned_opinions and columbia_opinions:

From 0bd9b9ac3bf2a511633d93de0bdebc49da06ca5d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 5 Sep 2023 15:52:59 -0600
Subject: [PATCH 022/372] feat(update_opinions_order): argument added to point
 to the mounted directory with xml files

---
 .../commands/update_opinions_order.py         | 47 +++++++++++++------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 0b96a5dae1..f6c72811d8 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,3 +1,4 @@
+import os.path
 import re
 from typing import Any, List, Optional
 
@@ -80,7 +81,6 @@ def get_opinion_content(
             # We store the field because we are using S3 for storage and that backend
             # doesn't support absolute paths
             xml_path = op.local_path
-            # print("url", op.local_path.url)
         content = None
         if len(op.html_with_citations) > 1:
             content = op.html_with_citations
@@ -113,9 +113,10 @@ def get_opinion_content(
     return xml_path, cl_cleaned_opinions, start_position, combined_opinion
 
 
-def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
+def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list:
     """Convert xml data into dict
     :param xml_filepath: path of xml file
+    :param xml_dir: absolute path to the directory with columbia xml files
     :return: dict with data
     """
 
@@ -134,17 +135,17 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
 
     data = {}  # type: dict
 
-    if "/home/mlissner" in str(xml_filepath):
-        # Temporary replace the path with the correct from S3, this way we read them
-        # directly from S3, we need the files in /sources/columbia/opinions/ in
-        # com-courtlistener-storage bucket
-        # TODO discuss this
-        xml_filepath.name = xml_filepath.name.replace(
-            "/home/mlissner", "/sources"
+    if "/home/mlissner/columbia/opinions/" in str(xml_filepath):
+        filepath = str(
+            xml_filepath.name.replace("/home/mlissner/columbia/opinions/", "")
         )
+        # fix file path temporarily
+        new_xml_filepath = os.path.join(xml_dir, filepath)
+    else:
+        logger.info(f"Can't fix xml file path: {xml_filepath}")
+        raise FileNotFoundError
 
-    # print(f"Opening {xml_filepath.url}")
-    with xml_filepath.open("r") as f:
+    with open(new_xml_filepath, "r", encoding="utf-8") as f:
         file_content = f.read()
 
         data["unpublished"] = False
@@ -477,11 +478,12 @@ def run_harvard(start_id: int, end_id: int):
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
-def run_columbia(start_id: int, end_id: int):
+def run_columbia(start_id: int, end_id: int, xml_dir: str):
     """
     Update opinion order for columbia clusters
     :param start_id: skip any id lower than this value
     :param end_id: skip any id greater than this value
+    :param xml_dir: absolute path to the directory with columbia xml files
     """
 
     # Get all columbia cluster ids with more than one opinion
@@ -510,7 +512,9 @@ def run_columbia(start_id: int, end_id: int):
         columbia_opinions = None
         if xml_path:
             try:
-                columbia_opinions = get_opinions_columbia_xml(xml_path)
+                columbia_opinions = get_opinions_columbia_xml(
+                    xml_path, xml_dir
+                )
             except FileNotFoundError:
                 logger.warning(
                     f"Xml file not found in {xml_path}, cluster id: {cluster_id}"
@@ -622,6 +626,12 @@ def add_arguments(self, parser):
             help="Fix columbia opinions order",
         )
 
+        parser.add_argument(
+            "--xml-dir",
+            required=False,
+            help="The absolute path to the directory with columbia xml files",
+        )
+
         parser.add_argument(
             "--start-id",
             type=int,
@@ -646,5 +656,12 @@ def handle(self, *args, **options):
         if options["process_harvard"]:
             run_harvard(options["start_id"], options["end_id"])
 
-        if options["process_columbia"]:
-            run_columbia(options["start_id"], options["end_id"])
+        if options["process_columbia"] and options["xml_dir"]:
+            run_columbia(
+                options["start_id"], options["end_id"], options["xml_dir"]
+            )
+
+        if options["process_columbia"] and not options["xml_dir"]:
+            print(
+                "Argument --xml-dir required to read xml files from mounted directory"
+            )

From 7b16b42d99c1f8b9076d47ddb0ba916df21b564a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 5 Sep 2023 16:09:04 -0600
Subject: [PATCH 023/372] feat(update_opinions_order): fix mypy error

---
 .../management/commands/update_opinions_order.py              | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index f6c72811d8..05a1bdb7f5 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -136,8 +136,8 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list:
     data = {}  # type: dict
 
     if "/home/mlissner/columbia/opinions/" in str(xml_filepath):
-        filepath = str(
-            xml_filepath.name.replace("/home/mlissner/columbia/opinions/", "")
+        filepath = str(xml_filepath).replace(
+            "/home/mlissner/columbia/opinions/", ""
         )
         # fix file path temporarily
         new_xml_filepath = os.path.join(xml_dir, filepath)

From d49708adfacecfa075dd3a298a8cdc867532c008 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 21 Sep 2023 13:27:05 -0600
Subject: [PATCH 024/372] fix(opinion_order): Update poetry.lock

---
 poetry.lock | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 90d12b08bf..a7f14d94cb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
 
 [[package]]
 name = "amqp"
@@ -1097,6 +1097,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -2577,6 +2588,16 @@ files = [
     {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -2721,7 +2742,7 @@ name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
 optional = false
-python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
+python-versions = ">=2.7,<3.0.0 || >=3.4.0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -5096,4 +5117,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "96bb211d8a53b99b00d7d118fd7f90f35dcf27b9a940532d8ea814eecc5cbd6b"
+content-hash = "6ce30a4f34302d7e0ca29bf1f9794ad2fc1759cef8312bcfebb5550a33cb0019"

From 9ae8dc891f764a471729dc8131e0e071bd9f9e7c Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 29 Sep 2023 17:15:04 -0600
Subject: [PATCH 025/372] fix(opinion_order): Update poetry.lock

---
 poetry.lock | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index c9b8295b88..a7fe6b3511 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1097,6 +1097,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -2577,6 +2588,16 @@ files = [
     {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -5096,4 +5117,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "4b906615444a53e1a26780aa6a3742c0e7844c307c6a991b059ee4de0cb177a8"
+content-hash = "6da7f3d3b926ac02caf9720eda2b6c81ae71fe04aafb6a0a35f83e52b4c412cc"

From 7702a082063ed2b80b6f803a1a6afa7af6347887 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 29 Sep 2023 18:03:45 -0600
Subject: [PATCH 026/372] fix(opinion_order): Rename migrations

---
 .../{0020_order_opinions.py => 0022_order_opinions.py}          | 2 +-
 .../{0020_order_opinions.sql => 0022_order_opinions.sql}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0020_order_opinions.py => 0022_order_opinions.py} (99%)
 rename cl/search/migrations/{0020_order_opinions.sql => 0022_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0020_order_opinions.py b/cl/search/migrations/0022_order_opinions.py
similarity index 99%
rename from cl/search/migrations/0020_order_opinions.py
rename to cl/search/migrations/0022_order_opinions.py
index f614156360..763c98e8fc 100644
--- a/cl/search/migrations/0020_order_opinions.py
+++ b/cl/search/migrations/0022_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0019_add_docket_source_noop"),
+        ("search", "0021_add_pghistory_courthouse"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0020_order_opinions.sql b/cl/search/migrations/0022_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0020_order_opinions.sql
rename to cl/search/migrations/0022_order_opinions.sql

From 3f173fef6ac191d2c1a0b43f38de3917a9f9b9bf Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 29 Nov 2023 13:08:52 -0600
Subject: [PATCH 027/372] fix(opinions_order): rename migrations

update poetry.lock
---
 ...der_opinions.py => 0024_order_opinions.py} |  2 +-
 ...r_opinions.sql => 0024_order_opinions.sql} |  0
 poetry.lock                                   | 30 ++++++++-----------
 3 files changed, 14 insertions(+), 18 deletions(-)
 rename cl/search/migrations/{0022_order_opinions.py => 0024_order_opinions.py} (99%)
 rename cl/search/migrations/{0022_order_opinions.sql => 0024_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0022_order_opinions.py b/cl/search/migrations/0024_order_opinions.py
similarity index 99%
rename from cl/search/migrations/0022_order_opinions.py
rename to cl/search/migrations/0024_order_opinions.py
index 763c98e8fc..1abaed4d76 100644
--- a/cl/search/migrations/0022_order_opinions.py
+++ b/cl/search/migrations/0024_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0021_add_pghistory_courthouse"),
+        ("search", "0023_add_docket_sources_noop"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0022_order_opinions.sql b/cl/search/migrations/0024_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0022_order_opinions.sql
rename to cl/search/migrations/0024_order_opinions.sql
diff --git a/poetry.lock b/poetry.lock
index cdb46a7ef7..30080de3f7 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,6 +1101,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -1493,18 +1504,6 @@ files = [
     {file = "fast_diff_match_patch-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c4cb3aa60664bcafd070915cc0f148c63da3a20babeca29bdf24e6aee80ff481"},
     {file = "fast_diff_match_patch-2.0.1-cp310-cp310-win32.whl", hash = "sha256:3423c373c168fcbc56fa488960248ce086dd686402817aa5d4d967537fff1203"},
     {file = "fast_diff_match_patch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:f8b5595277f99b4908ae9bab33548bfe7497a99a1f5dc5c277a4f36051dcf993"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a682a72b93e07902b9af3bc591fe365da4024888cceb308f04cdec59eeb3602d"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30e7fb0de87e02db88cda54f6c57a9f7d789e4d0922cfed41f61a1d4415408b"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:58b273cecb941bef392bda622a534de03e6ea8d3186d4d07745375cce9db0833"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e39bb9ca0b7632a15e85cb6b0c4c575010e6fb6e43e5714ee53c7cef1aa4135"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-win32.whl", hash = "sha256:b4d4e6aa5c6a4af0b6c66be593021579f4693c94b848084b89e6783180361db6"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:c1154830dbcb83d1c9ed24f43b1e8226cafc7ce46b6e0971e866bdf513ecc216"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6723cfba7bd9fb712e179acbc9c6cb526076612c0325ad4f1066f3bd176064a"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:378251cc37cd21d14802669a3453f026ed3aa07c07a8aa2daabeefd14a0e0a36"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7a2e1ce344438b14400a91b65c79c39345b0ce70a0a8797e88b14485577b5fc0"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cc7285d9a1fbf8990361ce37728202fd6ebee6ddc6cfe6fb15a19905e562f304"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-win32.whl", hash = "sha256:3aaeb207fe586979ecb194ecc2c81ba979d351cd0bdaba8489ce4be0f55206dc"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:4d759ec2d79c638407f32c29dc348fcef6e6a1659927056527b0939a1ab31ca5"},
     {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e5205e4f3b820f65138947e0d42959b6910fd959c8e5e8f4fc72472f6fec9d8b"},
     {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa1212d0200169e93392805957ca6ae351bfc51282c5119fb231f968c7e12fbc"},
     {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d30a9db041dfee960a9c8a35fa99685b1f29530f52f69fef1e3cc02867f0b9"},
@@ -1545,9 +1544,6 @@ files = [
     {file = "fast_diff_match_patch-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:58ada748637821445df3cfcb21df412136fb69b8e677ea364aa9ca7a8facb048"},
     {file = "fast_diff_match_patch-2.0.1-cp39-cp39-win32.whl", hash = "sha256:b07808e98f0bfcd557281126135b24729a30ee10ccc2db4d3358fb2f18ac1879"},
     {file = "fast_diff_match_patch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:6f2202d1e9d225918ea3803f66ca9c99d080c8ba5094c438680eb2c8dfd2e48c"},
-    {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ecff01b3d10d6bed965a1591e37597df118ab0bcc98a3f59a724a0d9bd63fb1"},
-    {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a92ba0d543524234a17ea2da4892a9752273cfdfed528e581f0f76cbd78cf991"},
-    {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd5b3b99bb7c14ce8ea5ab184afb2cc6796dac71439b2cfc6fb6227a6846aef3"},
     {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:daa821a8dcbc1026f7f8cc177ca599bcfbaaddccdf90bc1ad1e44255b1c239e1"},
     {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27de6dc97e7d6dc207585d778ace58e7cc364b8383e5412164224d52ad4099b5"},
     {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec27f797b1ecee79c3d76c9a081a6c20fd89068b41ba3b84a6ebe48317c5c46c"},
@@ -2750,7 +2746,7 @@ name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
 optional = false
-python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
+python-versions = ">=2.7,<3.0.0 || >=3.4.0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -5212,4 +5208,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "f3edde54a6877b5506669d8d8354b28d8b7c6dffbb08c4b0954079680cec63dc"
+content-hash = "ce20135f86ae0bc9264359886c298076a90c74d5a30256f7db4541812ffb4f76"

From 9dedd433ca589f7db5f4d71edd7318fbd34e3aa8 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 29 Nov 2023 19:14:00 -0600
Subject: [PATCH 028/372] fix(opinions_order): code refactored

NOTE: functions found in columbia_utils.py and utils.py, were temporarily added in the command,when the necessary changes are combined we need to remove the functions and import them from the utils.
---
 .../commands/update_opinions_order.py         | 833 ++++++++++--------
 1 file changed, 461 insertions(+), 372 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 05a1bdb7f5..ae931ba4b7 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -5,18 +5,356 @@
 from bs4 import BeautifulSoup, NavigableString, Tag
 from django.core.management import BaseCommand
 from django.db.models import Count
-from django.db.models.fields.files import FieldFile
 
 from cl.corpus_importer.utils import similarity_scores
 from cl.lib.command_utils import logger
 from cl.lib.string_diff import get_cosine_similarity
-from cl.search.models import Opinion, OpinionCluster
+from cl.search.models import SOURCES, Opinion, OpinionCluster
+
+VALID_COLUMBIA_SOURCES = [
+    key
+    for key in dict(SOURCES.NAMES).keys()
+    if SOURCES.COLUMBIA_ARCHIVE in key
+]
+
+VALID_HARVARD_SOURCES = [
+    key for key in dict(SOURCES.NAMES).keys() if SOURCES.HARVARD_CASELAW in key
+]
+
+
+# TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged
+
+
+SIMPLE_TAGS = [
+    "attorneys",
+    "caption",
+    "citation",
+    "court",
+    "date",
+    "docket",
+    "hearing_date",
+    "panel",
+    "posture",
+    "reporter_caption",
+]
+
+
+class EmptyOpinionException(Exception):
+    """An exception for opinions that raise a ZeroDivisionError Exception due empty
+    opinion tag or empty opinion content in cl"""
+
+    def __init__(self, message: str) -> None:
+        self.message = message
+
+
+def read_xml_to_soup(filepath: str) -> BeautifulSoup:
+    """This function reads the xml file, fixes the bad tags in columbia xml
+    files and returns a BeautifulSoup object
+
+    :param filepath: path to xml file
+    :return: BeautifulSoup object of parsed content
+    """
+    with open(filepath, "r", encoding="utf-8") as f:
+        file_content = f.read()
+        # Sometimes opening and ending tag mismatch (e.g. ed7c6b39dcb29c9c.xml)
+        file_content = file_content.replace(
+            "</footnote_body></block_quote>", "</block_quote></footnote_body>"
+        )
+        # Fix opinion with invalid attribute
+        if "<opinion unpublished=true>" in file_content:
+            file_content = file_content.replace(
+                "<opinion unpublished=true>", "<opinion unpublished='true'>"
+            )
+            file_content = file_content.replace("<unpublished>", "").replace(
+                "</unpublished>", ""
+            )
+    return BeautifulSoup(file_content, "lxml")
+
+
+def add_floating_opinion(
+    opinions: list, floating_content: list, opinion_order: int
+) -> list:
+    """We have found floating opinions in bs object, we keep the opinion
+    content as a new opinion
+
+    :param opinions: a list with opinions found
+    :param floating_content: content that is not in known non-opinion tags
+    :param opinion_order: opinion position
+    :return: updated list of opinions
+    """
+    op_type = "opinion"
+    if opinions:
+        if opinions[-1].get("type"):
+            # Use type of previous opinion if exists
+            op_type = opinions[-1].get("type")
+
+    # Get rid of double spaces from floating content
+    opinion_content = re.sub(
+        " +", " ", "\n".join(floating_content)
+    ).strip()  # type: str
+    if opinion_content:
+        opinions.append(
+            {
+                "opinion": opinion_content,
+                "order": opinion_order,
+                "byline": "",
+                "type": op_type,
+            }
+        )
+    return opinions
+
+
+def extract_columbia_opinions(
+    outer_opinion: BeautifulSoup,
+) -> list[Optional[dict]]:
+    """We extract all possible opinions from BeautifulSoup, with and without
+    author, and we create new opinions if floating content exists(content that
+    is not explicitly defined within an opinion tag or doesn't have an author)
+
+    :param outer_opinion: element containing all xml tags
+    :return: list of opinion dicts
+    """
+    opinions: list = []
+    floating_content = []
+    order = 0
+
+    # We iterate all content to look for all possible opinions
+    for i, content in enumerate(outer_opinion):  # type: int, Tag
+        if isinstance(content, NavigableString):
+            # We found a raw string, store it
+            floating_content.append(str(content))
+        else:
+            if content.name in SIMPLE_TAGS + [
+                "citation_line",
+                "opinion_byline",
+                "dissent_byline",
+                "concurrence_byline",
+            ]:
+                # Ignore these tags, it will be processed later
+                continue
+            elif content.name in [
+                "opinion_text",
+                "dissent_text",
+                "concurrence_text",
+            ]:
+                if floating_content:
+                    # We have found an opinion, but there is floating
+                    # content, we create a dict with the opinion using the
+                    # floating content with default type = "opinion"
+                    opinions = add_floating_opinion(
+                        opinions, floating_content, order
+                    )
+                    floating_content = []
+
+                byline = content.find_previous_sibling()
+                opinion_author = ""
+                if byline and "_byline" in byline.name:
+                    opinion_author = byline.get_text()
+
+                opinion_content = re.sub(
+                    " +", " ", content.decode_contents()
+                ).strip()
+                if opinion_content:
+                    # Now we create a dict with current opinion
+                    opinions.append(
+                        {
+                            "opinion": opinion_content,
+                            "order": order,
+                            "byline": opinion_author,
+                            "type": content.name.replace("_text", ""),
+                        }
+                    )
+                    order = order + 1
+
+            else:
+                if content.name not in SIMPLE_TAGS + ["syllabus"]:
+                    # We store content that is not inside _text tag and is
+                    # not in one of the known non-opinion tags
+                    floating_content.append(str(content))
+
+    # Combine the new content into another opinion. great.
+    if floating_content:
+        # If we end to go through all the found opinions and if we still
+        # have floating content out there, we create a new opinion with the
+        # last type of opinion
+        opinions = add_floating_opinion(opinions, floating_content, order)
+    return opinions
+
+
+def is_per_curiam_opinion(
+    content: Optional[str], byline: Optional[str]
+) -> bool:
+    """Check if opinion author is per curiam
+    :param content: opinion content
+    :param byline: opinion text author
+    :return: True if opinion author is per curiam
+    """
+    if byline and "per curiam" in byline[:1000].lower():
+        return True
+    if content and "per curiam" in content[:1000].lower():
+        return True
+    return False
+
+
+def merge_opinions(
+    opinions: list, content: list, current_order: int
+) -> tuple[list, int]:
+    """Merge last and previous opinion if are the same type or create a new
+    opinion if merge is not possible
+
+    :param opinions: list of opinions that is being updated constantly
+    :param content: list of opinions without an author
+    :param current_order: opinion position
+    :return: updated list of opinions
+    """
+
+    # We check if the previous stored opinion matches the type of the
+    # content, and we store the opinion dict temporary
+    relevant_opinions = (
+        [opinions[-1]]
+        if opinions and opinions[-1]["type"] == content[0].get("type")
+        else []
+    )
+
+    if relevant_opinions:
+        relevant_opinions[-1]["opinion"] += "\n" + "\n".join(
+            [f.get("opinion") for f in content if f.get("opinion")]
+        )
+
+    else:
+        # No relevant opinions found, create a new opinion with the content
+        opinion_content = "\n".join(
+            [f.get("opinion") for f in content if f.get("opinion")]
+        )
+        new_opinion = {
+            "byline": None,
+            "type": content[0].get("type"),
+            "opinion": opinion_content,
+            "order": current_order,
+            "per_curiam": is_per_curiam_opinion(opinion_content, None),
+        }
+        opinions.append(new_opinion)
+        current_order = current_order + 1
+
+    return opinions, current_order
+
+
+def process_extracted_opinions(extracted_opinions: list) -> list:
+    """We read the extracted data in extract_opinions function to merge all
+    possible floating opinions (it is not explicitly defined within an opinion
+    tag or doesn't have an author)
+
+    :param extracted_opinions: list of opinions obtained from xml file
+    :return: a list with extracted and processed opinions
+    """
+
+    opinions: list = []
+    authorless_content = []
+    order = 0
+
+    for i, found_content in enumerate(extracted_opinions, start=1):
+        byline = found_content.get("byline")
+        if not byline:
+            # Opinion has no byline, store opinion content
+            authorless_content.append(found_content)
+
+        if byline:
+            # Opinion has byline, get opinion type and content
+            opinion_type = found_content.get("type")
+            opinion_content = found_content.get("opinion", "")
+            # Store content that doesn't match the current opinion type
+            alternative_authorless_content = [
+                content
+                for content in authorless_content
+                if content.get("type") != opinion_type
+            ]
+            # Keep content that matches the current type
+            authorless_content = [
+                op_content
+                for op_content in authorless_content
+                if op_content.get("type") == opinion_type
+            ]
+
+            if alternative_authorless_content:
+                # Keep floating text that are not from the same type,
+                # we need to create a separate opinion for those,
+                # for example: in 2713f39c5a8e8684.xml we have an opinion
+                # without an author, and the next opinion with an author is
+                # a dissent opinion, we can't combine both
+                opinions, order = merge_opinions(
+                    opinions, alternative_authorless_content, order
+                )
+
+            opinion_content = (
+                "\n".join(
+                    [
+                        f.get("opinion")
+                        for f in authorless_content
+                        if f.get("type") == opinion_type
+                    ]
+                )
+                + "\n\n"
+                + opinion_content
+            )
+
+            # Add new opinion
+            new_opinion = {
+                "byline": byline,
+                "type": opinion_type,
+                "opinion": opinion_content,
+                "order": order,
+                "per_curiam": is_per_curiam_opinion(opinion_content, byline),
+            }
+
+            opinions.append(new_opinion)
+            order = order + 1
+            authorless_content = []
+
+        if len(extracted_opinions) == i and authorless_content:
+            # If is the last opinion, and we still have opinions without
+            # byline, create an opinion without an author and the contents
+            # that couldn't be merged
+            opinions, order = merge_opinions(
+                opinions, authorless_content, order
+            )
+
+    return opinions
+
+
+def map_opinion_types(opinions=None) -> None:
+    """Map opinion type to model field choice
+
+    :param opinions: a list that contains all opinions as dict elements
+    :return: None
+    """
+
+    if opinions is None:
+        opinions = []
+    lead = False
+    for op in opinions:
+        op_type = op.get("type")
+        # Only first opinion with "opinion" type is a lead opinion, the next
+        # opinion with "opinion" type is an addendum
+        if not lead and op_type and op_type == "opinion":
+            lead = True
+            op["type"] = "020lead"
+            continue
+        elif lead and op_type and op_type == "opinion":
+            op["type"] = "050addendum"
+        elif op_type and op_type == "dissent":
+            op["type"] = "040dissent"
+        elif op_type and op_type == "concurrence":
+            op["type"] = "030concurrence"
+
+
+# TODO ------------------------ remove until here -------------------------------
 
 
 def match_text_lists(
     file_opinions_list: List[Any], cl_opinions_list: List[Any]
 ) -> dict[int, int]:
     """Generate matching lists above threshold
+
     :param file_opinions_list: Opinions from file
     :param cl_opinions_list: CL opinions
     :return: Matches if found or empty dict
@@ -50,10 +388,11 @@ def match_text_lists(
     return matches
 
 
-def get_opinion_content(
+def get_opinions_cleaned_content(
     cluster_id,
-) -> tuple[Optional[FieldFile], list[dict], int, bool]:
-    """Get the opinions content for a cluster object
+) -> tuple[Optional[str], list[dict], int, bool]:
+    """Get cleaned opinions content for a cluster object
+
     :param cluster_id: Cluster ID for a set of opinions
     :return: (xml path, list of extracted opinions, start position, True if combined
     opinions exists in cluster)
@@ -67,380 +406,108 @@ def get_opinion_content(
         type="010combined"
     )
     xml_path = None
-    combined_opinion = False
+    cluster_has_combined_opinion = False
     if combined_opinions_cluster:
         # the combined opinion will be displayed at beginning
         start_position = combined_opinions_cluster.count()
-        combined_opinion = True
+        cluster_has_combined_opinion = True
     else:
         # we don't have combined opinions, we start ordering from 0 to n
         start_position = 0
 
     for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
         if op.local_path and not xml_path:
-            # We store the field because we are using S3 for storage and that backend
-            # doesn't support absolute paths
-            xml_path = op.local_path
-        content = None
-        if len(op.html_with_citations) > 1:
-            content = op.html_with_citations
-        elif len(op.html_columbia) > 1:
-            content = op.html_columbia
-        elif len(op.html_lawbox) > 1:
-            content = op.html_lawbox
-        elif len(op.plain_text) > 1:
-            content = op.plain_text
-        elif len(op.html) > 1:
-            content = op.html
-        elif len(op.xml_harvard) > 1:
-            content = op.xml_harvard
-        if content:
-            soup = BeautifulSoup(content, features="html.parser")
-            prep_text = re.sub(
-                r"[^a-zA-Z0-9 ]", "", soup.getText(separator=" ").lower()
-            )
-            prep_text = re.sub(" +", " ", prep_text)
-            cl_cleaned_opinions.append(
-                {
-                    "id": op.id,
-                    "byline": op.author_str,
-                    "type": op.type,
-                    "opinion": prep_text,
-                    "order": i,
-                }
-            )
-
-    return xml_path, cl_cleaned_opinions, start_position, combined_opinion
-
+            xml_path = str(op.local_path)
 
-def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list:
-    """Convert xml data into dict
-    :param xml_filepath: path of xml file
-    :param xml_dir: absolute path to the directory with columbia xml files
-    :return: dict with data
-    """
-
-    SIMPLE_TAGS = [
-        "attorneys",
-        "caption",
-        "citation",
-        "court",
-        "date",
-        "docket",
-        "hearing_date",
-        "panel",
-        "posture",
-        "reporter_caption",
-    ]
-
-    data = {}  # type: dict
-
-    if "/home/mlissner/columbia/opinions/" in str(xml_filepath):
-        filepath = str(xml_filepath).replace(
-            "/home/mlissner/columbia/opinions/", ""
-        )
-        # fix file path temporarily
-        new_xml_filepath = os.path.join(xml_dir, filepath)
-    else:
-        logger.info(f"Can't fix xml file path: {xml_filepath}")
-        raise FileNotFoundError
-
-    with open(new_xml_filepath, "r", encoding="utf-8") as f:
-        file_content = f.read()
+        content = None
 
-        data["unpublished"] = False
+        # We can only use columbia's content to infer the ordering
+        if len(op.html_columbia) > 1:
+            content = op.html_columbia
 
-        if "<opinion unpublished=true>" in file_content:
-            file_content = file_content.replace(
-                "<opinion unpublished=true>", "<opinion>"
-            )
-            file_content = file_content.replace("<unpublished>", "").replace(
-                "</unpublished>", ""
+        if not content:
+            raise EmptyOpinionException(
+                "There is no content in html_columbia field"
             )
 
-            data["unpublished"] = True
+        soup = BeautifulSoup(content, features="html.parser")
+        opinion_text = soup.getText(separator=" ", strip=True)
+        prep_text = re.sub(
+            " +", " ", " ".join(opinion_text.split("\n"))
+        ).strip()
+        prep_text = re.sub(r"[^a-zA-Z0-9 ]", "", prep_text.lower())
+
+        cl_cleaned_opinions.append(
+            {
+                "id": op.id,
+                "byline": op.author_str,
+                "type": op.type,
+                "opinion": prep_text,
+                "order": i,
+            }
+        )
 
-    # Sometimes opening and ending tag mismatch (e.g. c6b39dcb29c9c.xml)
-    file_content = file_content.replace(
-        "</footnote_body></block_quote>", "</block_quote></footnote_body>"
+    return (
+        xml_path,
+        cl_cleaned_opinions,
+        start_position,
+        cluster_has_combined_opinion,
     )
 
-    soup = BeautifulSoup(file_content, "lxml")
-
-    # Find the outer <opinion> tag to have all elements inside
-    find_opinion = soup.find("opinion")
-
-    step_one_opinions = []  # type: list
-    opinions = []  # type: list
-    order = 0
-
-    if find_opinion:
-        untagged_content = []
 
-        # We iterate all content, with and without tags
-        # STEP 1: Extract all content in multiple dict elements
-        for i, content in enumerate(find_opinion):  # type: int, Tag
-            if type(content) == NavigableString:
-                # We found a raw string, store it
-                untagged_content.append(str(content))
+def fix_filepath(filepath: str) -> str:
+    """Fix filepath from file field
 
-            else:
-                if content.name in SIMPLE_TAGS + [
-                    "citation_line",
-                    "opinion_byline",
-                    "dissent_byline",
-                    "concurrence_byline",
-                ]:
-                    # Ignore these tags, it will be processed later
-                    continue
-                elif content.name in [
-                    "opinion_text",
-                    "dissent_text",
-                    "concurrence_text",
-                ]:
-                    if untagged_content:
-                        # We found something other than a navigable string that is
-                        # not an opinion, but now we have found an opinion,
-                        # let's create this content first
-
-                        # default type
-                        op_type = "opinion"
-                        if step_one_opinions:
-                            if step_one_opinions[-1].get("type"):
-                                # use type of previous opinion if exists
-                                op_type = step_one_opinions[-1].get("type")
-
-                        # Get rid of double spaces
-                        opinion_content = re.sub(
-                            " +", " ", "\n".join(untagged_content)
-                        ).strip()  # type: str
-                        if opinion_content:
-                            step_one_opinions.append(
-                                {
-                                    "opinion": opinion_content,
-                                    "order": order,
-                                    "byline": "",
-                                    "type": op_type,
-                                }
-                            )
-                            order = order + 1
-                        untagged_content = []
-
-                    byline = content.find_previous_sibling()
-                    opinion_author = ""
-                    if byline and "_byline" in byline.name:
-                        opinion_author = byline.get_text()
-
-                    opinion_content = re.sub(
-                        " +", " ", content.decode_contents()
-                    ).strip()
-                    if opinion_content:
-                        step_one_opinions.append(
-                            {
-                                "opinion": opinion_content,
-                                "order": order,
-                                "byline": opinion_author,
-                                "type": content.name.replace("_text", ""),
-                            }
-                        )
-                        order = order + 1
-
-                else:
-                    # Content not inside _text tag, we store it
-                    untagged_content.append(str(content))
-
-        if untagged_content:
-            # default type
-            op_type = "opinion"
-            if step_one_opinions:
-                if step_one_opinions[-1].get("type"):
-                    # use type of previous opinion if exists
-                    op_type = step_one_opinions[-1].get("type")
-
-            opinion_content = re.sub(
-                " +", " ", "\n".join(untagged_content)
-            ).strip()
-            if opinion_content:
-                step_one_opinions.append(
-                    {
-                        "opinion": opinion_content,
-                        "order": order,
-                        "byline": "",
-                        "type": op_type,
-                    }
-                )
+    :param filepath: path from file field
+    :return: new file path
+    """
+    if "/home/mlissner/columbia/opinions/" in filepath:
+        filepath = filepath.replace("/home/mlissner/columbia/opinions/", "")
+    return filepath
 
-        # Step 2: Merge found content in the xml file
-        new_order = 0
-        authorless_content = []
-
-        for i, found_content in enumerate(step_one_opinions, start=1):
-            byline = found_content.get("byline")
-            if not byline:
-                # Opinion has no byline, store it
-                authorless_content.append(found_content)
-
-            if byline:
-                # Opinion has byline
-                opinion_type = found_content.get("type")
-                opinion_content = found_content.get("opinion", "")
-                # Store content that doesn't match the current type
-                alternative_authorless_content = [
-                    z
-                    for z in authorless_content
-                    if z.get("type") != opinion_type
-                ]
-                # Keep content that matches the current type
-                authorless_content = [
-                    z
-                    for z in authorless_content
-                    if z.get("type") == opinion_type
-                ]
-
-                if alternative_authorless_content:
-                    # Keep floating text that are not from the same type,
-                    # we need to create a separate opinion for those,
-                    # for example: in 2713f39c5a8e8684.xml we have an opinion
-                    # without an author, and the next opinion with an author is
-                    # a dissent opinion, we can't combine both
-
-                    # We check if the previous stored opinion matches the type of the
-                    # content
-                    relevant_opinions = (
-                        [opinions[-1]]
-                        if opinions
-                        and opinions[-1]["type"]
-                        == alternative_authorless_content[0].get("type")
-                        else []
-                    )
 
-                    if relevant_opinions:
-                        previous_opinion = relevant_opinions[-1]
-                        if previous_opinion.get(
-                            "type"
-                        ) == alternative_authorless_content[0].get("type"):
-                            # Merge last opinion with previous opinion, it probably
-                            # belongs the same author
-                            relevant_opinions[-1][
-                                "opinion"
-                            ] += "\n" + "\n".join(
-                                [
-                                    f.get("opinion")
-                                    for f in alternative_authorless_content
-                                    if f.get("opinion")
-                                ]
-                            )
-                        authorless_content = []
+def get_opinions_columbia_file(xml_filepath: str) -> list:
+    """Get opinions from columbia xml file and convert it into dict
 
-                    else:
-                        # No relevant opinions found, create a new opinion
-                        new_opinion = {
-                            "byline": None,
-                            "type": alternative_authorless_content[0].get(
-                                "type"
-                            ),
-                            "opinion": "\n".join(
-                                [
-                                    f.get("opinion")
-                                    for f in alternative_authorless_content
-                                    if f.get("opinion")
-                                ]
-                            ),
-                            "order": new_order,
-                        }
-                        new_order = new_order + 1
-                        opinions.append(new_opinion)
-
-                # Add new opinion
-                new_opinion = {
-                    "byline": byline,
-                    "type": opinion_type,
-                    "opinion": "\n".join(
-                        [
-                            f.get("opinion")
-                            for f in authorless_content
-                            if f.get("type") == opinion_type
-                        ]
-                    )
-                    + "\n\n"
-                    + opinion_content,
-                    "order": new_order,
-                }
-
-                opinions.append(new_opinion)
-                new_order = new_order + 1
-                authorless_content = []
-
-            if len(step_one_opinions) == i and authorless_content:
-                # If is the last opinion, and we still have opinions without
-                # byline, create an opinion without an author and the contents
-                # that couldn't be merged
-
-                # We check if the previous stored opinion matches the type of the
-                # content
-                relevant_opinions = (
-                    [opinions[-1]]
-                    if opinions
-                    and opinions[-1]["type"]
-                    == authorless_content[0].get("type")
-                    else []
-                )
+    :param xml_filepath: path of xml file
+    :return: dict with data
+    """
+    soup = read_xml_to_soup(xml_filepath)
 
-                if relevant_opinions:
-                    previous_opinion = relevant_opinions[-1]
-                    if previous_opinion.get("type") == authorless_content[
-                        0
-                    ].get("type"):
-                        # Merge last opinion with previous opinion, it probably
-                        # belongs the same author
-                        relevant_opinions[-1]["opinion"] += "\n" + "\n".join(
-                            [
-                                f.get("opinion")
-                                for f in authorless_content
-                                if f.get("opinion")
-                            ]
-                        )
+    # Find the outer <opinion> tag to have all elements inside
+    outer_opinion = soup.find("opinion")
 
-                else:
-                    # Create last floating opinion
-                    new_opinion = {
-                        "byline": None,
-                        "type": authorless_content[0].get("type"),
-                        "opinion": "\n".join(
-                            [
-                                f.get("opinion")
-                                for f in authorless_content
-                                if f.get("opinion")
-                            ]
-                        ),
-                        "order": new_order,
-                    }
-                    opinions.append(new_opinion)
+    extracted_opinions = extract_columbia_opinions(outer_opinion)
+    opinions = process_extracted_opinions(extracted_opinions)
+    map_opinion_types(opinions)
 
     for op in opinions:
         opinion_content = op.get("opinion")
-        opinion_content = BeautifulSoup(
-            opinion_content, "html.parser"
-        ).getText()
-        opinion_content = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_content.lower())
-        op["opinion"] = opinion_content
+        soup = BeautifulSoup(opinion_content, "html.parser")
+        opinion_text = soup.getText(separator=" ", strip=True)
+        opinion_text = re.sub(
+            " +", " ", " ".join(opinion_text.split("\n"))
+        ).strip()
+        cleaned_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_text.lower())
+        op["opinion"] = cleaned_opinion
 
     return opinions
 
 
-def run_harvard(start_id: int, end_id: int):
-    """
-    We assume that harvard data is already ordered, we just need to fill the order
+def sort_harvard_opinions(start_id: int, end_id: int) -> None:
+    """We assume that harvard data is already ordered, we just need to fill the order
     field in each opinion
+
     :param start_id: skip any id lower than this value
     :param end_id: skip any id greater than this value
+    :return: None
     """
 
     # Get all harvard clusters with more than one opinion
     clusters = (
         OpinionCluster.objects.prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source="U")
+        .filter(opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES)
         .order_by("id")
     )
 
@@ -478,18 +545,19 @@ def run_harvard(start_id: int, end_id: int):
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
-def run_columbia(start_id: int, end_id: int, xml_dir: str):
-    """
-    Update opinion order for columbia clusters
+def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
+    """Update opinion ordering for columbia clusters
+
     :param start_id: skip any id lower than this value
     :param end_id: skip any id greater than this value
     :param xml_dir: absolute path to the directory with columbia xml files
+    :return: None
     """
 
     # Get all columbia cluster ids with more than one opinion
     clusters = (
         OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source="Z")
+        .filter(opinions_count__gt=1, source__in=VALID_COLUMBIA_SOURCES)
         .order_by("id")
         .values_list("id", flat=True)
     )
@@ -502,37 +570,53 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str):
 
     for cluster_id in clusters:
         logger.info(f"Processing cluster id: {cluster_id}")
-        (
-            xml_path,
-            cl_cleaned_opinions,
-            start_position,
-            combined_opinion,
-        ) = get_opinion_content(cluster_id)
-
-        columbia_opinions = None
+
+        try:
+            (
+                xml_path,
+                cl_cleaned_opinions,
+                start_position,
+                cluster_has_combined_opinion,
+            ) = get_opinions_cleaned_content(cluster_id)
+        except EmptyOpinionException:
+            logger.warning(
+                f"At least one of the opinions from cluster id: {cluster_id} is empty."
+            )
+            continue
+
+        extracted_columbia_opinions = None
         if xml_path:
-            try:
-                columbia_opinions = get_opinions_columbia_xml(
-                    xml_path, xml_dir
-                )
-            except FileNotFoundError:
+            fixed_xml_filepath = os.path.join(xml_dir, fix_filepath(xml_path))
+
+            if not os.path.exists(fixed_xml_filepath):
                 logger.warning(
-                    f"Xml file not found in {xml_path}, cluster id: {cluster_id}"
+                    f"Xml file not found in {fixed_xml_filepath}, cluster id: {cluster_id}"
+                )
+                continue
+
+            try:
+                extracted_columbia_opinions = get_opinions_columbia_file(
+                    fixed_xml_filepath
                 )
+            except UnicodeDecodeError:
+                logger.warning(f"Cannot decode file: {fixed_xml_filepath}")
                 continue
 
-        if cl_cleaned_opinions and columbia_opinions:
+        if cl_cleaned_opinions and extracted_columbia_opinions:
+            columbia_opinions_content = [
+                op.get("opinion")
+                for op in extracted_columbia_opinions
+                if op.get("opinion")
+            ]
+            cl_opinions_content = [
+                op.get("opinion")
+                for op in cl_cleaned_opinions
+                if op.get("opinion")
+            ]
+
             matches = match_text_lists(
-                [
-                    op.get("opinion")
-                    for op in columbia_opinions
-                    if op.get("opinion")
-                ],
-                [
-                    op.get("opinion")
-                    for op in cl_cleaned_opinions
-                    if op.get("opinion")
-                ],
+                columbia_opinions_content,
+                cl_opinions_content,
             )
 
             if matches:
@@ -557,7 +641,7 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str):
                 failed = False
                 for file_pos, cl_pos in matches.items():
                     # file_pos is the correct index to find the opinion id to update
-                    file_opinion = columbia_opinions[file_pos]
+                    file_opinion = extracted_columbia_opinions[file_pos]
                     # the order was calculated using the xml file
                     file_order = file_opinion.get("order") + start_position
                     cl_opinion = cl_cleaned_opinions[cl_pos]
@@ -587,7 +671,7 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str):
                         failed = True
                         break
 
-                if combined_opinion and not failed:
+                if cluster_has_combined_opinion and not failed:
                     combined_opinions_cluster = Opinion.objects.filter(
                         cluster_id=cluster_id, type="010combined"
                     ).order_by("id")
@@ -628,6 +712,7 @@ def add_arguments(self, parser):
 
         parser.add_argument(
             "--xml-dir",
+            default="/opt/courtlistener/_columbia",
             required=False,
             help="The absolute path to the directory with columbia xml files",
         )
@@ -653,11 +738,15 @@ def handle(self, *args, **options):
             )
             return
 
+        if not options["process_harvard"] and not options["process_columbia"]:
+            print("One option required: process-harvard or process-columbia")
+            return
+
         if options["process_harvard"]:
-            run_harvard(options["start_id"], options["end_id"])
+            sort_harvard_opinions(options["start_id"], options["end_id"])
 
         if options["process_columbia"] and options["xml_dir"]:
-            run_columbia(
+            sort_columbia_opinions(
                 options["start_id"], options["end_id"], options["xml_dir"]
             )
 

From f808b95b68487580b3d24be400afee91dcd4f938 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 30 Nov 2023 11:43:58 -0600
Subject: [PATCH 029/372] fix(opinions_order): code refactored

NOTE: functions found in columbia_utils.py and utils.py, were temporarily added in the command,when the necessary changes are combined we need to remove the functions and import them from the utils.
---
 .../commands/update_opinions_order.py         | 231 +++++++++++-------
 1 file changed, 147 insertions(+), 84 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index ae931ba4b7..5b86c98130 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,12 +1,13 @@
 import os.path
 import re
-from typing import Any, List, Optional
+from typing import Any, Optional
 
 from bs4 import BeautifulSoup, NavigableString, Tag
 from django.core.management import BaseCommand
+from django.db import transaction
 from django.db.models import Count
 
-from cl.corpus_importer.utils import similarity_scores
+from cl.corpus_importer.utils import compare_documents, similarity_scores
 from cl.lib.command_utils import logger
 from cl.lib.string_diff import get_cosine_similarity
 from cl.search.models import SOURCES, Opinion, OpinionCluster
@@ -24,7 +25,6 @@
 
 # TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged
 
-
 SIMPLE_TAGS = [
     "attorneys",
     "caption",
@@ -347,47 +347,86 @@ def map_opinion_types(opinions=None) -> None:
             op["type"] = "030concurrence"
 
 
-# TODO ------------------------ remove until here -------------------------------
-
-
-def match_text_lists(
-    file_opinions_list: List[Any], cl_opinions_list: List[Any]
+def match_opinion_lists(
+    file_opinions_list: list[Any], cl_opinions_list: list[Any]
 ) -> dict[int, int]:
-    """Generate matching lists above threshold
+    """Try to match the opinions on two lists and generate a dict with position of
+    matching opinions
+
+    Remove non-alphanumeric and non-whitespace characters from lowercased text,
+    this tries to make both texts in equal conditions to prove if both are similar or
+    equal
+
+    get_cosine_similarity works great when both texts are almost the same with very
+    small variations
+
+    Sometimes cosine similarity fails when there are small variations in text,
+    such as parties, attorneys, case name, or court that are included in the content
+    of the opinion, compare_documents() checks the percentage of the file opinion
+    text that it is in courtlistener opinion, having a large percentage means that
+    almost all the file opinion is in courtlistener opinion, but there is a
+    possibility that the courtlistener opinion contains some additional data in que
+    opinion content (such as case name, parties, etc.)
+
+    compare_documents works good when the opinion from the file is a subset of the
+    opinion in CL, the percentage represents how much of the opinion of the file is
+    in the opinion from cl (content in cl opinion can have other data in the body
+    like posture, attorneys, etc. e.g. in cluster id: 7643871 we have the posture and
+    the opinion text but in the xml file we only have the opinion text, cosine_sim:
+    0.1639075094124459 and percent_match: 73)
+
+    Sometimes one algorithm performs better than the other, this is due to some
+    additional text, such as editor's notes, or the author, page number or posture
+    added to the opinion
+
+    Key is opinion position from file, Value is opinion position from cl opinion e.g.
+    matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file opinion and
+    2 is cl opinion
 
     :param file_opinions_list: Opinions from file
     :param cl_opinions_list: CL opinions
     :return: Matches if found or empty dict
     """
-    # We import this here to avoid a circular import
-    from cl.corpus_importer.management.commands.harvard_opinions import (
-        compare_documents,
-    )
 
     scores = similarity_scores(file_opinions_list, cl_opinions_list)
 
     matches = {}
     for i, row in enumerate(scores):
         j = row.argmax()  # type: ignore
-        # Lower threshold for small opinions.
-        if (
-            get_cosine_similarity(file_opinions_list[i], cl_opinions_list[j])
-            < 0.60
-        ):
-            continue
-        percent_match = compare_documents(
-            file_opinions_list[i], cl_opinions_list[j]
+        file_opinion = re.sub(
+            r"[^a-zA-Z0-9 ]", "", file_opinions_list[i].lower()
         )
-        if percent_match < 60:
+        cl_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", cl_opinions_list[j].lower())
+
+        cosine_sim = get_cosine_similarity(file_opinion, cl_opinion)
+
+        percent_match = compare_documents(file_opinion, cl_opinion)
+
+        if cosine_sim < 0.60 and percent_match < 60:
             continue
+
         matches[i] = j
 
-    # Key is opinion position from file, Value is opinion position from cl opinion
-    # e.g. matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file
-    # opinion and 2 is cl opinion
     return matches
 
 
+def clean_opinion_content(text: str) -> str:
+    """Clean opinion content
+
+    :param text: text to clean
+    :return: cleaned text
+    """
+
+    # Replace line breaks with spaces and get rid of double spaces
+    text = re.sub(" +", " ", " ".join(text.split("\n"))).strip()
+
+    # Remove non-alphanumeric and non-whitespace characters from lowercased text
+    return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
+
+
+# TODO ------------------------ remove until here -------------------------------
+
+
 def get_opinions_cleaned_content(
     cluster_id,
 ) -> tuple[Optional[str], list[dict], int, bool]:
@@ -432,10 +471,7 @@ def get_opinions_cleaned_content(
 
         soup = BeautifulSoup(content, features="html.parser")
         opinion_text = soup.getText(separator=" ", strip=True)
-        prep_text = re.sub(
-            " +", " ", " ".join(opinion_text.split("\n"))
-        ).strip()
-        prep_text = re.sub(r"[^a-zA-Z0-9 ]", "", prep_text.lower())
+        prep_text = clean_opinion_content(opinion_text)
 
         cl_cleaned_opinions.append(
             {
@@ -485,10 +521,7 @@ def get_opinions_columbia_file(xml_filepath: str) -> list:
         opinion_content = op.get("opinion")
         soup = BeautifulSoup(opinion_content, "html.parser")
         opinion_text = soup.getText(separator=" ", strip=True)
-        opinion_text = re.sub(
-            " +", " ", " ".join(opinion_text.split("\n"))
-        ).strip()
-        cleaned_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_text.lower())
+        cleaned_opinion = clean_opinion_content(opinion_text)
         op["opinion"] = cleaned_opinion
 
     return opinions
@@ -545,6 +578,78 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None:
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
+def update_opinions(
+    cluster_id: int,
+    cl_opinions: list,
+    columbia_opinions: list,
+    matches: dict,
+    cluster_has_combined_opinion: bool,
+    start_position: int,
+):
+    """Update opinions with correct order
+
+    :param cluster_id:
+    :param cl_opinions: a list with cleaned opinions from cl
+    :param columbia_opinions: a ordered list with cleaned opinions from xml file
+    :param matches: a dict with the matches of each opinion of both lists
+    :param cluster_has_combined_opinion: True if the cluster has combined opinions
+    :param start_position: the number from where the order should begin for
+    non-combined opinions
+    :return: None
+    """
+    update_failed = False
+
+    with transaction.atomic():
+        for file_pos, cl_pos in matches.items():
+            # file_pos is the correct index to find the opinion id to update
+            file_opinion = columbia_opinions[file_pos]
+            # the order was calculated using the xml file
+            file_order = file_opinion.get("order") + start_position
+            cl_opinion = cl_opinions[cl_pos]
+            opinion_id_to_update = cl_opinion.get("id")
+
+            if opinion_id_to_update:
+                try:
+                    # Update opinion order
+                    op = Opinion.objects.get(id=opinion_id_to_update)
+                    op.order = file_order
+                    op.save()
+                except Opinion.DoesNotExist:
+                    # This should not happen, but it is better to be
+                    # cautious
+                    logger.warning(
+                        f"We can't update opinion, opinion doesn't exist "
+                        f"with id: {opinion_id_to_update}"
+                    )
+                    update_failed = True
+                    break
+
+        if cluster_has_combined_opinion and not update_failed:
+            combined_opinions_cluster = Opinion.objects.filter(
+                cluster_id=cluster_id, type="010combined"
+            ).order_by("id")
+
+            # Show combined opinions at beginning
+            for opinion_order, cluster_op in enumerate(
+                combined_opinions_cluster
+            ):
+                cluster_op.order = opinion_order
+                cluster_op.save()
+
+        if update_failed:
+            # There was an error updating an opinion, rollback all changes for
+            # cluster's opinions
+            logger.warning(
+                f"There was an error updating the order of opinions of the "
+                f"cluster id: {cluster_id}"
+            )
+            transaction.set_rollback(True)
+        else:
+            logger.info(
+                f"The order of opinions was updated, cluster id: {cluster_id}"
+            )
+
+
 def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
     """Update opinion ordering for columbia clusters
 
@@ -614,7 +719,7 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                 if op.get("opinion")
             ]
 
-            matches = match_text_lists(
+            matches = match_opinion_lists(
                 columbia_opinions_content,
                 cl_opinions_content,
             )
@@ -638,57 +743,15 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                     # Go to next cluster id
                     continue
 
-                failed = False
-                for file_pos, cl_pos in matches.items():
-                    # file_pos is the correct index to find the opinion id to update
-                    file_opinion = extracted_columbia_opinions[file_pos]
-                    # the order was calculated using the xml file
-                    file_order = file_opinion.get("order") + start_position
-                    cl_opinion = cl_cleaned_opinions[cl_pos]
-                    opinion_id_to_update = cl_opinion.get("id")
-
-                    if opinion_id_to_update:
-                        try:
-                            # Save opinion
-                            op = Opinion.objects.get(id=opinion_id_to_update)
-                            op.order = file_order
-                            op.save()
-                            logger.info(
-                                f"Cluster id processed: {cluster_id} Update opinion id: {opinion_id_to_update} with position: {file_order}"
-                            )
-                        except Opinion.DoesNotExist:
-                            logger.warning(
-                                f"We can't update opinion, opinion doesn't exist with "
-                                f"id: {opinion_id_to_update}"
-                            )
-                            failed = True
-                            break
-                    else:
-                        logger.warning(
-                            f"We can't update opinion, empty opinion id "
-                            f"from cluster: {cluster_id}"
-                        )
-                        failed = True
-                        break
-
-                if cluster_has_combined_opinion and not failed:
-                    combined_opinions_cluster = Opinion.objects.filter(
-                        cluster_id=cluster_id, type="010combined"
-                    ).order_by("id")
-
-                    # Show combined opinions at beginning
-                    for opinion_order, cluster_op in enumerate(
-                        combined_opinions_cluster
-                    ):
-                        cluster_op.order = opinion_order
-                        cluster_op.save()
-
-            else:
-                # No matches found
-                logger.warning(
-                    f"Failed to match opinions from cluster id: {cluster_id}"
+                # Update all opinions order
+                update_opinions(
+                    cluster_id,
+                    cl_cleaned_opinions,
+                    extracted_columbia_opinions,
+                    matches,
+                    cluster_has_combined_opinion,
+                    start_position,
                 )
-                continue
 
 
 class Command(BaseCommand):

From f928aa021fe9de812f9e82b64a044582b5ffda78 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 19 Feb 2024 18:13:46 -0600
Subject: [PATCH 030/372] fix(opinion_order): update poetry.lock and
 pyproject.toml

---
 poetry.lock    | 13 ++++++++++++-
 pyproject.toml |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 24a1c45791..25db969843 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1062,6 +1062,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -5105,4 +5116,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "d0cb9ebf26ba111318df8c00976f71ad6b18ffc1aafab1df3b506bfe5128611d"
+content-hash = "a8dfd3edc2209cb2d357696b751508ebd0c249be0b1b408f2f7225884a5e7b2a"
diff --git a/pyproject.toml b/pyproject.toml
index 32afda8f5f..e8d88a61f8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -112,6 +112,7 @@ httpx = {extras = ["http2"], version = "^0.26.0"}
 django-model-utils = "^4.3.1"
 juriscraper = "*"
 django-permissions-policy = "^4.19.0"
+django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From d46b42fd39b6abacf301ae3ce46ed090d5cb5446 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 19 Feb 2024 18:21:48 -0600
Subject: [PATCH 031/372] fix(opinion_order): rename migrations

---
 .../{0024_order_opinions.py => 0027_order_opinions.py}          | 2 +-
 .../{0024_order_opinions.sql => 0027_order_opinions.sql}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0024_order_opinions.py => 0027_order_opinions.py} (98%)
 rename cl/search/migrations/{0024_order_opinions.sql => 0027_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0024_order_opinions.py b/cl/search/migrations/0027_order_opinions.py
similarity index 98%
rename from cl/search/migrations/0024_order_opinions.py
rename to cl/search/migrations/0027_order_opinions.py
index 1abaed4d76..e1c602e2e5 100644
--- a/cl/search/migrations/0024_order_opinions.py
+++ b/cl/search/migrations/0027_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0023_add_docket_sources_noop"),
+        ("search", "0026_drop_docket_unique_together_and_more"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0024_order_opinions.sql b/cl/search/migrations/0027_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0024_order_opinions.sql
rename to cl/search/migrations/0027_order_opinions.sql

From cefb8482ed586e65526f59818901eca56ca26e7d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 6 May 2024 18:12:54 -0600
Subject: [PATCH 032/372] feat(opinion_order): resolve merge conflict, rename
 migrations

---
 ..._order_opinions.py => 0031_order_opinions.py} |  2 +-
 ...rder_opinions.sql => 0031_order_opinions.sql} |  0
 poetry.lock                                      | 16 +++++++++++++---
 3 files changed, 14 insertions(+), 4 deletions(-)
 rename cl/search/migrations/{0027_order_opinions.py => 0031_order_opinions.py} (98%)
 rename cl/search/migrations/{0027_order_opinions.sql => 0031_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0027_order_opinions.py b/cl/search/migrations/0031_order_opinions.py
similarity index 98%
rename from cl/search/migrations/0027_order_opinions.py
rename to cl/search/migrations/0031_order_opinions.py
index e1c602e2e5..9e7774203d 100644
--- a/cl/search/migrations/0027_order_opinions.py
+++ b/cl/search/migrations/0031_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0026_drop_docket_unique_together_and_more"),
+        ("search", "0030_recapdocument_pacer_doc_id_idx"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0027_order_opinions.sql b/cl/search/migrations/0031_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0027_order_opinions.sql
rename to cl/search/migrations/0031_order_opinions.sql
diff --git a/poetry.lock b/poetry.lock
index f22583b490..109cadc2d3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "amqp"
@@ -1062,6 +1062,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -2467,7 +2478,6 @@ files = [
     {file = "lxml-5.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9e2addd2d1866fe112bc6f80117bcc6bc25191c5ed1bfbcf9f1386a884252ae8"},
     {file = "lxml-5.2.1-cp37-cp37m-win32.whl", hash = "sha256:f51969bac61441fd31f028d7b3b45962f3ecebf691a510495e5d2cd8c8092dbd"},
     {file = "lxml-5.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c"},
-    {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3e183c6e3298a2ed5af9d7a356ea823bccaab4ec2349dc9ed83999fd289d14d5"},
     {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:804f74efe22b6a227306dd890eecc4f8c59ff25ca35f1f14e7482bbce96ef10b"},
     {file = "lxml-5.2.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:08802f0c56ed150cc6885ae0788a321b73505d2263ee56dad84d200cab11c07a"},
     {file = "lxml-5.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f8c09ed18ecb4ebf23e02b8e7a22a05d6411911e6fabef3a36e4f371f4f2585"},
@@ -5259,4 +5269,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "994213014ffbb4387604c85fddd76e01112f4e3b66a1be6bc77f601b5b1de1b8"
+content-hash = "c6a4dd1a9c6ecf961e254a3d6d0387f4d5e6f6fdb4181c33e2c55174e68d4454"

From 0b93a8c6f1b41158ddafc04ed593ce3a58686c24 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Tue, 14 May 2024 19:27:47 -0500
Subject: [PATCH 033/372] feat(cl_scrape_opinions): ingest more Juriscraper
 fields

Partially solves #4042

Ingest "lower_courts" into Docket.appeal_from_str
Ingest "dispositions" into OpinionCluster.disposition
Ingest "authors" into Opinion.author_str
Ingest "joined_by" into Opinion.joined_by
Ingest "per_curiam" into Opinion.per_curiam
Ingest "types" into Opinion.type

Last 4 fields are not supported in Juriscraper as of yet, but the changes proposed keep the default behavior
---
 cl/scrapers/management/commands/cl_scrape_opinions.py | 7 ++++++-
 cl/scrapers/utils.py                                  | 3 +++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index 1ea37385b8..d3387a7b72 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -103,6 +103,7 @@ def make_objects(
         item.get("source") or Docket.SCRAPER,
         blocked=blocked,
         date_blocked=date_blocked,
+        appeal_from_str=item.get("lower_courts", ""),
     )
 
     cluster = OpinionCluster(
@@ -117,6 +118,7 @@ def make_objects(
         blocked=blocked,
         date_blocked=date_blocked,
         syllabus=item.get("summaries", ""),
+        disposition=item.get("dispositions", ""),
     )
 
     cites = [item.get(key, "") for key in ["citations", "parallel_citations"]]
@@ -131,9 +133,12 @@ def make_objects(
         url = ""
 
     opinion = Opinion(
-        type=Opinion.COMBINED,
+        type=item.get("types", Opinion.COMBINED),
         sha1=sha1_hash,
         download_url=url,
+        author_str=item.get("authors", ""),
+        joined_by_str=item.get("joined_by", ""),
+        per_curiam=item.get("per_curiam", False),
     )
 
     cf = ContentFile(content)
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index bb7d47ebae..d75bada36f 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -295,6 +295,7 @@ def update_or_create_docket(
     date_blocked: date | None = None,
     date_argued: date | None = None,
     ia_needs_upload: bool | None = None,
+    appeal_from_str: str = "",
 ) -> Docket:
     """Look for an existing Docket and update it or create a new one if it's
     not found.
@@ -309,6 +310,7 @@ def update_or_create_docket(
     :param date_blocked: The docket date_blocked if it's blocked.
     :param date_argued: The docket date_argued if it's an oral argument.
     :param ia_needs_upload: If the docket needs upload to IA, default None.
+    :param appeal_from_str: Name (not standardized id) of the lower level court.
     :return: The docket.
     """
 
@@ -320,6 +322,7 @@ def update_or_create_docket(
         "date_blocked": date_blocked,
         "date_argued": date_argued,
         "ia_needs_upload": ia_needs_upload,
+        "appeal_from_str": appeal_from_str,
     }
 
     docket = async_to_sync(find_docket_object)(court_id, None, docket_number)

From d1a1708f363764056e4c6f9e0159e460675ad3da Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 5 Jun 2024 12:58:05 -0600
Subject: [PATCH 034/372] fix(opinion_order): update poetry.lock to solve merge
 conflicts

---
 poetry.lock | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 6d7f85852a..cbc5ec2cc3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1058,6 +1058,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -5254,4 +5265,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "814ca0b0dc8db689f83e391fc58b494de48f6321085872bfaa8e37b7a7fc0e99"
+content-hash = "a64d61d094d3896cb204e882ff2471b4f3b69def7416a2b50cdcedc9acf6455e"

From 754d71fda6d7a12d4dfc6dddf121399d6d0582c9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 5 Jun 2024 13:06:55 -0600
Subject: [PATCH 035/372] fix(opinion_order): rename migration

---
 .../{0031_order_opinions.py => 0032_order_opinions.py}          | 2 +-
 .../{0031_order_opinions.sql => 0032_order_opinions.sql}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0031_order_opinions.py => 0032_order_opinions.py} (98%)
 rename cl/search/migrations/{0031_order_opinions.sql => 0032_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0031_order_opinions.py b/cl/search/migrations/0032_order_opinions.py
similarity index 98%
rename from cl/search/migrations/0031_order_opinions.py
rename to cl/search/migrations/0032_order_opinions.py
index 9e7774203d..b34bb01d48 100644
--- a/cl/search/migrations/0031_order_opinions.py
+++ b/cl/search/migrations/0032_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0030_recapdocument_pacer_doc_id_idx"),
+        ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0031_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0031_order_opinions.sql
rename to cl/search/migrations/0032_order_opinions.sql

From 06c814d2b4bb3624203da11d371fb4d3eb984772 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 27 Jun 2024 09:49:23 -0600
Subject: [PATCH 036/372] feat(scrape_pacer_free): run scraper by specifying
 court run scraper for court by specifying start and end date run scraper for
 court by specifying start and end date and day span

---
 .../commands/scrape_pacer_free_opinions.py    | 258 ++++++++++++------
 1 file changed, 177 insertions(+), 81 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index e4717ec06d..d42d211e0f 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -1,12 +1,13 @@
 import argparse
+import datetime
 import os
-from datetime import date, timedelta
 from typing import Callable, Dict, List, Optional, Tuple, cast
 
 from celery.canvas import chain
 from django.conf import settings
 from django.db.models import QuerySet
 from django.utils.timezone import now
+from juriscraper.lib.date_utils import make_date_range_tuples
 from juriscraper.lib.exceptions import PacerLoginException
 from juriscraper.lib.string_utils import CaseNameTweaker
 from requests import RequestException
@@ -19,6 +20,7 @@
     mark_court_done_on_date,
     process_free_opinion_result,
 )
+from cl.lib.argparse_types import valid_date
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.lib.pacer import map_cl_to_pacer_id, map_pacer_to_cl_id
@@ -35,7 +37,7 @@
 def get_next_date_range(
     court_id: str,
     span: int = 7,
-) -> Tuple[Optional[date], Optional[date]]:
+) -> Tuple[Optional[datetime.date], Optional[datetime.date]]:
     """Get the next start and end query dates for a court.
 
     Check the DB for the last date for a court that was completed. Return the
@@ -64,15 +66,16 @@ def get_next_date_range(
     # Ensure that we go back five days from the last time we had success if
     # that success was in the last few days.
     last_complete_date = min(
-        now().date() - timedelta(days=5), last_completion_log.date_queried
+        now().date() - datetime.timedelta(days=5),
+        last_completion_log.date_queried,
     )
     next_end_date = min(
-        now().date(), last_complete_date + timedelta(days=span)
+        now().date(), last_complete_date + datetime.timedelta(days=span)
     )
     return last_complete_date, next_end_date
 
 
-def mark_court_in_progress(court_id: str, d: date) -> QuerySet:
+def mark_court_in_progress(court_id: str, d: datetime.date) -> QuerySet:
     log = PACERFreeDocumentLog.objects.create(
         status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
         date_queried=d,
@@ -81,6 +84,51 @@ def mark_court_in_progress(court_id: str, d: date) -> QuerySet:
     return log
 
 
+def fetch_doc_report(
+    pacer_court_id: int,
+    start: Optional[datetime.date],
+    end: Optional[datetime.date],
+):
+    exception_raised = False
+    status = PACERFreeDocumentLog.SCRAPE_FAILED
+
+    logger.info(
+        "Attempting to get latest document references for "
+        "%s between %s and %s",
+        pacer_court_id,
+        start,
+        end,
+    )
+    try:
+        status = get_and_save_free_document_report(pacer_court_id, start, end)
+    except (
+        RequestException,
+        ReadTimeoutError,
+        IndexError,
+        TypeError,
+        PacerLoginException,
+        ValueError,
+    ) as exc:
+        if isinstance(exc, (RequestException, ReadTimeoutError)):
+            reason = "network error."
+        elif isinstance(exc, IndexError):
+            reason = "PACER 6.3 bug."
+        elif isinstance(exc, (TypeError, ValueError)):
+            reason = "failing PACER website."
+        elif isinstance(exc, PacerLoginException):
+            reason = "PACER login issue."
+        else:
+            reason = "unknown reason."
+        logger.error(
+            "Failed to get free document references for "
+            f"{pacer_court_id} between {start} and "
+            f"{end} due to {reason}."
+        )
+        exception_raised = True
+
+    return exception_raised, status
+
+
 def get_and_save_free_document_reports(options: OptionsType) -> None:
     """Query the Free Doc Reports on PACER and get a list of all the free
     documents. Do not download those items, as that step is done later. For now
@@ -95,96 +143,100 @@ def get_and_save_free_document_reports(options: OptionsType) -> None:
     done.
     """
     # Kill any *old* logs that report they're in progress. (They've failed.)
-    three_hrs_ago = now() - timedelta(hours=3)
+    three_hrs_ago = now() - datetime.timedelta(hours=3)
     PACERFreeDocumentLog.objects.filter(
         date_started__lt=three_hrs_ago,
         status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
     ).update(status=PACERFreeDocumentLog.SCRAPE_FAILED)
 
-    cl_court_ids = (
-        Court.federal_courts.district_or_bankruptcy_pacer_courts()
-        .filter(
-            in_use=True,
-            end_date=None,
+    excluded_court_ids = ["casb", "gub", "ilnb", "innb", "miwb", "ohsb", "prb"]
+
+    if options["courts"] != ["all"]:
+        cl_court_ids = (
+            Court.federal_courts.district_or_bankruptcy_pacer_courts()
+            .filter(
+                in_use=True,
+                end_date=None,
+                pk__in=options["courts"],
+            )
+            .exclude(pk__in=excluded_court_ids)
+            .values_list("pk", flat=True)
         )
-        .exclude(pk__in=["casb", "gub", "ilnb", "innb", "miwb", "ohsb", "prb"])
-        .values_list("pk", flat=True)
-    )
+    else:
+        cl_court_ids = (
+            Court.federal_courts.district_or_bankruptcy_pacer_courts()
+            .filter(
+                in_use=True,
+                end_date=None,
+            )
+            .exclude(pk__in=excluded_court_ids)
+            .values_list("pk", flat=True)
+        )
+
     pacer_court_ids = [map_cl_to_pacer_id(v) for v in cl_court_ids]
-    today = now()
-    for pacer_court_id in pacer_court_ids:
-        while True:
-            next_start_d, next_end_d = get_next_date_range(pacer_court_id)
-            if next_end_d is None:
-                logger.warning(
-                    f"Free opinion scraper for {pacer_court_id} still "
-                    "in progress."
-                )
-                break
 
-            logger.info(
-                "Attempting to get latest document references for "
-                "%s between %s and %s",
-                pacer_court_id,
-                next_start_d,
-                next_end_d,
-            )
-            mark_court_in_progress(pacer_court_id, next_end_d)
-            try:
-                status = get_and_save_free_document_report(
-                    pacer_court_id, next_start_d, next_end_d
-                )
-            except (
-                RequestException,
-                ReadTimeoutError,
-                IndexError,
-                TypeError,
-                PacerLoginException,
-                ValueError,
-            ) as exc:
-                if isinstance(exc, (RequestException, ReadTimeoutError)):
-                    reason = "network error."
-                elif isinstance(exc, IndexError):
-                    reason = "PACER 6.3 bug."
-                elif isinstance(exc, (TypeError, ValueError)):
-                    reason = "failing PACER website."
-                elif isinstance(exc, PacerLoginException):
-                    reason = "PACER login issue."
-                else:
-                    reason = "unknown reason."
-                logger.error(
-                    "Failed to get free document references for "
-                    f"{pacer_court_id} between {next_start_d} and "
-                    f"{next_end_d} due to {reason}."
+    if options["date_start"] and options["date_end"]:
+        date_ranges = make_date_range_tuples(
+            options["date_start"], options["date_end"], gap=options["span"]
+        )
+        for pacer_court_id in pacer_court_ids:
+            for start, end in date_ranges:
+                exception_raised, status = fetch_doc_report(
+                    pacer_court_id, start, end
                 )
-                mark_court_done_on_date(
-                    PACERFreeDocumentLog.SCRAPE_FAILED,
-                    pacer_court_id,
-                    next_end_d,
+                if exception_raised:
+                    break
+
+    else:
+        today = now()
+        for pacer_court_id in pacer_court_ids:
+            while True:
+                next_start_d, next_end_d = get_next_date_range(pacer_court_id)
+                print(
+                    f"next_start_d: {next_start_d} - next_end_d: {next_end_d}"
                 )
-                break
+                if next_end_d is None:
+                    logger.warning(
+                        f"Free opinion scraper for {pacer_court_id} still "
+                        "in progress."
+                    )
+                    break
+
+                mark_court_in_progress(pacer_court_id, next_end_d)
 
-            mark_court_done_on_date(status, pacer_court_id, next_end_d)
+                exc, status = fetch_doc_report(
+                    pacer_court_id, next_start_d, next_end_d
+                )
+                if exc:
+                    mark_court_done_on_date(
+                        PACERFreeDocumentLog.SCRAPE_FAILED,
+                        pacer_court_id,
+                        next_end_d,
+                    )
+                    break
 
-            if status == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
-                if next_end_d >= today.date():
-                    logger.info(
-                        "Got all document references for '%s'.", pacer_court_id
+                mark_court_done_on_date(status, pacer_court_id, next_end_d)
+
+                if status == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
+                    if next_end_d >= today.date():
+                        logger.info(
+                            "Got all document references for '%s'.",
+                            pacer_court_id,
+                        )
+                        # Break from while loop, onwards to next court
+                        break
+                    else:
+                        # More dates to do; let it continue
+                        continue
+
+                elif status == PACERFreeDocumentLog.SCRAPE_FAILED:
+                    logger.error(
+                        "Encountered critical error on %s "
+                        "(network error?). Marking as failed and "
+                        "pressing on." % pacer_court_id
                     )
                     # Break from while loop, onwards to next court
                     break
-                else:
-                    # More dates to do; let it continue
-                    continue
-
-            elif status == PACERFreeDocumentLog.SCRAPE_FAILED:
-                logger.error(
-                    "Encountered critical error on %s "
-                    "(network error?). Marking as failed and "
-                    "pressing on." % pacer_court_id
-                )
-                # Break from while loop, onwards to next court
-                break
 
 
 def get_pdfs(options: OptionsType) -> None:
@@ -202,7 +254,18 @@ def get_pdfs(options: OptionsType) -> None:
     q = cast(str, options["queue"])
     index = options["index"]
     cnt = CaseNameTweaker()
-    rows = PACERFreeDocumentRow.objects.filter(error_msg="").only("pk")
+    rows = PACERFreeDocumentRow.objects.filter(error_msg="")
+
+    if options["courts"] != ["all"]:
+        rows = rows.filter(court_id__in=options["courts"])
+
+    if options["date_start"] and options["date_end"]:
+        rows = rows.filter(
+            date_filed__gte=options["date_start"],
+            date_filed__lte=options["date_end"],
+        )
+
+    rows = rows.only("pk")
     count = rows.count()
     task_name = "downloading"
     if index:
@@ -297,9 +360,42 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None:
             default=False,
             help="Do we index as we go, or leave that to be done later?",
         )
+        parser.add_argument(
+            "--courts",
+            type=str,
+            default=["all"],
+            nargs="*",
+            help="The courts that you wish to parse.",
+        )
+        parser.add_argument(
+            "--date-start",
+            dest="date_start",
+            required=False,
+            type=valid_date,
+            help="Date when the query should start.",
+        )
+        parser.add_argument(
+            "--date-end",
+            dest="date_end",
+            required=False,
+            type=valid_date,
+            help="Date when the query should end.",
+        )
+        parser.add_argument(
+            "--span",
+            type=int,
+            default=7,
+            help="The number of days, inclusive, that a query should span at a time.",
+        )
 
     def handle(self, *args: List[str], **options: OptionsType) -> None:
         super().handle(*args, **options)
+
+        if options["date_start"] and options["date_end"]:
+            if options["date_start"] > options["date_end"]:  # type: ignore
+                print("Error: date-end must be greater than date-start.")
+                return
+
         action = cast(Callable, options["action"])
         action(options)
 

From 51c59b59926b8c4737a2f1045b1385f0b7f451bf Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Tue, 9 Jul 2024 20:25:35 -0500
Subject: [PATCH 037/372] feat(logging): enable juriscraper loggers on console
 handler

Solves #4188

Most juriscraper files use a logger called "Logger", whose specific name must be added to the `loggers` dict for it to be used.

Adding "juriscraper" module level logger in case other loggers are defined
---
 cl/settings/project/logging.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/cl/settings/project/logging.py b/cl/settings/project/logging.py
index 226cecbee4..0b1a793246 100644
--- a/cl/settings/project/logging.py
+++ b/cl/settings/project/logging.py
@@ -78,6 +78,16 @@ def skip_unreadable_post(record):
         },
         # This is the one that's used practically everywhere in the code.
         "cl": {"handlers": ["console"], "level": "INFO", "propagate": True},
+        "juriscraper": {
+            "handlers": ["console"],
+            "propagate": True,
+            "level": "DEBUG",
+        },
+        "Logger": {
+            "handlers": ["console"],
+            "propagate": True,
+            "level": "DEBUG",
+        },
     },
 }
 

From 683a79726b47e131a1885d2aa4c94464e8ea13b2 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 10 Jul 2024 15:34:37 -0600
Subject: [PATCH 038/372] feat(scrape_pacer_free_opinions): save pacer html
 files update task to update court log status

---
 .../commands/scrape_pacer_free_opinions.py    | 84 ++++++++++++-------
 cl/corpus_importer/tasks.py                   | 48 +++++++----
 cl/recap/models.py                            |  2 +
 3 files changed, 88 insertions(+), 46 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index d42d211e0f..1b40006ab2 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -5,9 +5,7 @@
 
 from celery.canvas import chain
 from django.conf import settings
-from django.db.models import QuerySet
 from django.utils.timezone import now
-from juriscraper.lib.date_utils import make_date_range_tuples
 from juriscraper.lib.exceptions import PacerLoginException
 from juriscraper.lib.string_utils import CaseNameTweaker
 from requests import RequestException
@@ -75,7 +73,9 @@ def get_next_date_range(
     return last_complete_date, next_end_date
 
 
-def mark_court_in_progress(court_id: str, d: datetime.date) -> QuerySet:
+def mark_court_in_progress(
+    court_id: str, d: datetime.date
+) -> PACERFreeDocumentLog:
     log = PACERFreeDocumentLog.objects.create(
         status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
         date_queried=d,
@@ -88,9 +88,11 @@ def fetch_doc_report(
     pacer_court_id: int,
     start: Optional[datetime.date],
     end: Optional[datetime.date],
+    log_id: int = 0,
 ):
     exception_raised = False
     status = PACERFreeDocumentLog.SCRAPE_FAILED
+    rows_to_create = 0
 
     logger.info(
         "Attempting to get latest document references for "
@@ -100,7 +102,7 @@ def fetch_doc_report(
         end,
     )
     try:
-        status = get_and_save_free_document_report(pacer_court_id, start, end)
+        status, rows_to_create = get_and_save_free_document_report(pacer_court_id, start, end, log_id)  # type: ignore
     except (
         RequestException,
         ReadTimeoutError,
@@ -122,10 +124,19 @@ def fetch_doc_report(
         logger.error(
             "Failed to get free document references for "
             f"{pacer_court_id} between {start} and "
-            f"{end} due to {reason}."
+            f"{end} due to {reason}.",
+            exc_info=True,
         )
         exception_raised = True
 
+    logger.info(
+        "Got %s document references for " "%s between %s and %s",
+        rows_to_create,
+        pacer_court_id,
+        start,
+        end,
+    )
+
     return exception_raised, status
 
 
@@ -176,25 +187,19 @@ def get_and_save_free_document_reports(options: OptionsType) -> None:
     pacer_court_ids = [map_cl_to_pacer_id(v) for v in cl_court_ids]
 
     if options["date_start"] and options["date_end"]:
-        date_ranges = make_date_range_tuples(
-            options["date_start"], options["date_end"], gap=options["span"]
-        )
         for pacer_court_id in pacer_court_ids:
-            for start, end in date_ranges:
-                exception_raised, status = fetch_doc_report(
-                    pacer_court_id, start, end
-                )
-                if exception_raised:
-                    break
-
+            # Here we do not save the log since if an incorrect range is entered
+            # the next time the daily cron is executed the command could skip days
+            exc, status = fetch_doc_report(
+                pacer_court_id, options["date_start"], options["date_end"]  # type: ignore
+            )
+            if exc:
+                break
     else:
         today = now()
         for pacer_court_id in pacer_court_ids:
             while True:
                 next_start_d, next_end_d = get_next_date_range(pacer_court_id)
-                print(
-                    f"next_start_d: {next_start_d} - next_end_d: {next_end_d}"
-                )
                 if next_end_d is None:
                     logger.warning(
                         f"Free opinion scraper for {pacer_court_id} still "
@@ -202,20 +207,21 @@ def get_and_save_free_document_reports(options: OptionsType) -> None:
                     )
                     break
 
-                mark_court_in_progress(pacer_court_id, next_end_d)
+                log = mark_court_in_progress(pacer_court_id, next_end_d)
 
                 exc, status = fetch_doc_report(
-                    pacer_court_id, next_start_d, next_end_d
+                    pacer_court_id, next_start_d, next_end_d, log.pk
                 )
                 if exc:
+                    # Something failed
                     mark_court_done_on_date(
+                        log.pk,
                         PACERFreeDocumentLog.SCRAPE_FAILED,
-                        pacer_court_id,
-                        next_end_d,
                     )
                     break
 
-                mark_court_done_on_date(status, pacer_court_id, next_end_d)
+                # Scrape successful
+                mark_court_done_on_date(log.pk, status)
 
                 if status == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
                     if next_end_d >= today.date():
@@ -233,7 +239,8 @@ def get_and_save_free_document_reports(options: OptionsType) -> None:
                     logger.error(
                         "Encountered critical error on %s "
                         "(network error?). Marking as failed and "
-                        "pressing on." % pacer_court_id
+                        "pressing on." % pacer_court_id,
+                        exc_info=True,
                     )
                     # Break from while loop, onwards to next court
                     break
@@ -319,6 +326,21 @@ def ocr_available(options: OptionsType) -> None:
             logger.info(f"Sent {i + 1}/{count} tasks to celery so far.")
 
 
+def do_monthly():
+    # Run everything monthly range
+    pass
+
+
+def do_weekly():
+    # Run everything weekly range
+    pass
+
+
+def do_all():
+    # run all courts since first day started to query each court
+    pass
+
+
 def do_everything(options: OptionsType):
     logger.info("Running and compiling free document reports.")
     get_and_save_free_document_reports(options)
@@ -381,21 +403,16 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None:
             type=valid_date,
             help="Date when the query should end.",
         )
-        parser.add_argument(
-            "--span",
-            type=int,
-            default=7,
-            help="The number of days, inclusive, that a query should span at a time.",
-        )
 
     def handle(self, *args: List[str], **options: OptionsType) -> None:
         super().handle(*args, **options)
 
         if options["date_start"] and options["date_end"]:
             if options["date_start"] > options["date_end"]:  # type: ignore
-                print("Error: date-end must be greater than date-start.")
+                print(
+                    "Error: date-end must be greater or equal than date-start option."
+                )
                 return
-
         action = cast(Callable, options["action"])
         action(options)
 
@@ -404,4 +421,7 @@ def handle(self, *args: List[str], **options: OptionsType) -> None:
         "get-report-results": get_and_save_free_document_reports,
         "get-pdfs": get_pdfs,
         "ocr-available": ocr_available,
+        "do-monthly": do_monthly,
+        "do-weekly": do_weekly,
+        "do-all": do_all,
     }
diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 09b2a10526..697050fb99 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -322,17 +322,15 @@ def download_recap_item(
     soft_time_limit=240,
 )
 def get_and_save_free_document_report(
-    self: Task,
-    court_id: str,
-    start: date,
-    end: date,
-) -> int:
+    self: Task, court_id: str, start: date, end: date, log_id: int = 0
+) -> Tuple[int, int]:
     """Download the Free document report and save it to the DB.
 
     :param self: The Celery task.
     :param court_id: A pacer court id.
     :param start: a date object representing the first day to get results.
     :param end: a date object representing the last day to get results.
+    :param log_id: a PACERFreeDocumentLog object id
     :return: The status code of the scrape
     """
     cookies = get_or_cache_pacer_cookies(
@@ -397,6 +395,34 @@ def get_and_save_free_document_report(
             return PACERFreeDocumentLog.SCRAPE_FAILED
         raise self.retry(exc=exc, countdown=5)
 
+    if log_id:
+        # We only save the html when the script is run automatically every day
+        log = PACERFreeDocumentLog.objects.get(pk=log_id)
+        for result in report.responses:
+            if isinstance(result, dict):
+                response = result.get("response")
+                query_start = result.get("start")
+                query_end = result.get("end")
+
+                if response and query_start and query_end:
+                    pacer_file = PacerHtmlFiles(
+                        content_object=log,
+                        upload_type=UPLOAD_TYPE.FREE_OPINIONS_REPORT,
+                    )
+                    pacer_file.filepath.save(
+                        f"free_opinions_report_{court_id}_from_{query_start.replace('/', '-')}_to_{query_end.replace('/', '-')}.html",
+                        ContentFile(response.text.encode()),
+                    )
+            else:
+                # FreeOpinionReport now returns a list of dicts with additional data
+                # instead of a list of requests responses.
+                # This is temporary while the new version of juriscraper is added to
+                # courtlistener
+                logger.info(
+                    "New version of juriscraper not yet implemented. Can't "
+                    "save PacerHtmlFiles object."
+                )
+
     document_rows_to_create = []
     for row in results:
         document_row = PACERFreeDocumentRow(
@@ -417,7 +443,7 @@ def get_and_save_free_document_report(
     # Create PACERFreeDocumentRow in bulk
     PACERFreeDocumentRow.objects.bulk_create(document_rows_to_create)
 
-    return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL
+    return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL, len(document_rows_to_create)
 
 
 @app.task(bind=True, max_retries=5, ignore_result=True)
@@ -866,18 +892,12 @@ def upload_to_ia(
 
 
 @app.task
-def mark_court_done_on_date(
-    status: int, court_id: str, d: date
-) -> Optional[int]:
-    court_id = map_pacer_to_cl_id(court_id)
+def mark_court_done_on_date(log_id: int, status: int) -> Optional[int]:
     try:
-        doc_log = PACERFreeDocumentLog.objects.filter(
-            status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS, court_id=court_id
-        ).latest("date_queried")
+        doc_log = PACERFreeDocumentLog.objects.get(pk=log_id)
     except PACERFreeDocumentLog.DoesNotExist:
         return None
     else:
-        doc_log.date_queried = d
         doc_log.status = status
         doc_log.date_completed = now()
         doc_log.save()
diff --git a/cl/recap/models.py b/cl/recap/models.py
index f1183f28a9..6c5ec7055d 100644
--- a/cl/recap/models.py
+++ b/cl/recap/models.py
@@ -29,6 +29,7 @@ class UPLOAD_TYPE:
     CASE_QUERY_RESULT_PAGE = 14
     APPELLATE_CASE_QUERY_RESULT_PAGE = 15
     ACMS_DOCKET_JSON = 16
+    FREE_OPINIONS_REPORT = 17
     NAMES = (
         (DOCKET, "HTML Docket"),
         (ATTACHMENT_PAGE, "HTML attachment page"),
@@ -46,6 +47,7 @@ class UPLOAD_TYPE:
         (CASE_QUERY_RESULT_PAGE, "Case query result page"),
         (APPELLATE_CASE_QUERY_RESULT_PAGE, "Appellate Case query result page"),
         (ACMS_DOCKET_JSON, "ACMS docket JSON object"),
+        (FREE_OPINIONS_REPORT, "Free opinions report"),
     )
 
 

From 97ce67d0e03920f8197ccd5f8ffafd5be6f538e7 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 10 Jul 2024 15:47:46 -0600
Subject: [PATCH 039/372] feat(scrape_pacer_free_opinions): migrations for new
 upload type

---
 ...4_alter_pacerhtmlfiles_upload_type_noop.py | 64 +++++++++++++++++++
 ..._alter_pacerhtmlfiles_upload_type_noop.sql | 10 +++
 2 files changed, 74 insertions(+)
 create mode 100644 cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py
 create mode 100644 cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql

diff --git a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py b/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py
new file mode 100644
index 0000000000..a6b0c7ac66
--- /dev/null
+++ b/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py
@@ -0,0 +1,64 @@
+# Generated by Django 5.0.6 on 2024-07-10 21:44
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("recap", "0013_processingqueue_update"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="pacerhtmlfiles",
+            name="upload_type",
+            field=models.SmallIntegerField(
+                choices=[
+                    (1, "HTML Docket"),
+                    (2, "HTML attachment page"),
+                    (3, "PDF"),
+                    (4, "Docket history report"),
+                    (5, "Appellate HTML docket"),
+                    (6, "Appellate HTML attachment page"),
+                    (7, "Internet Archive XML docket"),
+                    (8, "Case report (iquery.pl) page"),
+                    (9, "Claims register page"),
+                    (10, "Zip archive of RECAP Documents"),
+                    (11, "Email in the SES storage format"),
+                    (12, "Case query page"),
+                    (13, "Appellate Case query page"),
+                    (14, "Case query result page"),
+                    (15, "Appellate Case query result page"),
+                    (16, "ACMS docket JSON object"),
+                    (17, "Free opinions report"),
+                ],
+                help_text="The type of object that is uploaded",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="processingqueue",
+            name="upload_type",
+            field=models.SmallIntegerField(
+                choices=[
+                    (1, "HTML Docket"),
+                    (2, "HTML attachment page"),
+                    (3, "PDF"),
+                    (4, "Docket history report"),
+                    (5, "Appellate HTML docket"),
+                    (6, "Appellate HTML attachment page"),
+                    (7, "Internet Archive XML docket"),
+                    (8, "Case report (iquery.pl) page"),
+                    (9, "Claims register page"),
+                    (10, "Zip archive of RECAP Documents"),
+                    (11, "Email in the SES storage format"),
+                    (12, "Case query page"),
+                    (13, "Appellate Case query page"),
+                    (14, "Case query result page"),
+                    (15, "Appellate Case query result page"),
+                    (16, "ACMS docket JSON object"),
+                    (17, "Free opinions report"),
+                ],
+                help_text="The type of object that is uploaded",
+            ),
+        ),
+    ]
diff --git a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql b/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql
new file mode 100644
index 0000000000..b0d9d1e378
--- /dev/null
+++ b/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql
@@ -0,0 +1,10 @@
+BEGIN;
+--
+-- Alter field upload_type on pacerhtmlfiles
+--
+-- (no-op)
+--
+-- Alter field upload_type on processingqueue
+--
+-- (no-op)
+COMMIT;

From ce3bba145c5f4e6d97d8dadc5ae1e9640a7973e7 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 11:01:43 -0400
Subject: [PATCH 040/372] feat(setting): Adds new setting for managing a list
 of proxies

---
 cl/settings/project/security.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cl/settings/project/security.py b/cl/settings/project/security.py
index 57a0ef19f6..a5af603077 100644
--- a/cl/settings/project/security.py
+++ b/cl/settings/project/security.py
@@ -16,6 +16,7 @@
 EGRESS_PROXY_HOST = env(
     "EGRESS_PROXY_HOST", default="http://cl-webhook-sentry:9090"
 )
+EGRESS_PROXY_HOSTS: list[str] = env.list("EGRESS_PROXY_HOSTS", default=[])
 
 SECURE_HSTS_SECONDS = 63_072_000
 SECURE_HSTS_INCLUDE_SUBDOMAINS = True

From f31ca5870cd1858343b7acee3aa63574bc9deca1 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 11:30:31 -0400
Subject: [PATCH 041/372] feat(lib): Adds proxy selection logic for the
 ProxyPacerSession class

This commit updates the `ProxyPacerSession` class to enable selection of a proxy connection string from the application settings.
---
 cl/lib/pacer_session.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py
index 7c993556cd..6ff68f9174 100644
--- a/cl/lib/pacer_session.py
+++ b/cl/lib/pacer_session.py
@@ -1,4 +1,5 @@
 import pickle
+import random
 from typing import Union
 from urllib.parse import urlparse
 
@@ -28,14 +29,37 @@ class ProxyPacerSession(PacerSession):
     """
 
     def __init__(
-        self, cookies=None, username=None, password=None, client_code=None
+        self,
+        cookies=None,
+        username=None,
+        password=None,
+        client_code=None,
+        proxy=None,
     ):
         super().__init__(cookies, username, password, client_code)
+        self.proxy_address = proxy if proxy else self._pick_proxy_connection()
         self.proxies = {
-            "http": settings.EGRESS_PROXY_HOST,
+            "http": self.proxy_address,
         }
         self.headers["X-WhSentry-TLS"] = "true"
 
+    def _pick_proxy_connection(self) -> str:
+        """
+        Picks a proxy connection string from available options.
+
+        If the `settings.EGRESS_PROXY_HOSTS` list is empty, this function
+        returns the value from `settings.EGRESS_PROXY_HOST`. Otherwise, it
+        randomly chooses a string from the `settings.EGRESS_PROXY_HOSTS` list
+        and returns it.
+
+        Returns:
+            str: The chosen proxy connection string.
+        """
+        if not settings.EGRESS_PROXY_HOSTS:
+            return settings.EGRESS_PROXY_HOST
+
+        return random.choice(settings.EGRESS_PROXY_HOSTS)
+
     def _change_protocol(self, url: str) -> str:
         """Converts a URL from HTTPS to HTTP protocol.
 

From db2c3ca0b67267b0586f9177431153627ec616ff Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 12:07:45 -0400
Subject: [PATCH 042/372] feat(lib): Tweaks log_into_pacer to return cookie jar
 and proxy address

This commit updates the `log_into_pacer` method to return a tuple containing the user's cookie and the proxy address used for login (if applicable). This improvement provides more context about the login session, facilitating further actions requiring both cookies and potential proxy information.
---
 cl/lib/pacer_session.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py
index 6ff68f9174..740745f134 100644
--- a/cl/lib/pacer_session.py
+++ b/cl/lib/pacer_session.py
@@ -99,13 +99,13 @@ def log_into_pacer(
     username: str,
     password: str,
     client_code: str | None = None,
-) -> RequestsCookieJar:
+) -> tuple[RequestsCookieJar, str]:
     """Log into PACER and return the cookie jar
 
     :param username: A PACER username
     :param password: A PACER password
     :param client_code: A PACER client_code
-    :return: Request.CookieJar
+    :return: A tuple containing the Request.CookieJar and the proxy address
     """
     s = ProxyPacerSession(
         username=username,
@@ -113,7 +113,7 @@ def log_into_pacer(
         client_code=client_code,
     )
     s.login()
-    return s.cookies
+    return s.cookies, s.proxy_address
 
 
 def get_or_cache_pacer_cookies(

From a99ac37ecd0858c52a4fc29a4f40278089d86b6f Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 12:25:10 -0400
Subject: [PATCH 043/372] feat(lib): Updates get_or_cache_pacer_cookies to
 return tuples

This commit tweaks the `get_or_cache_pacer_cookies` function to handle the new return format of `log_into_pacer`. It ensures compatibility with both the updated function and any existing cached data that might be in the old format.
---
 cl/lib/pacer_session.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py
index 740745f134..a7543ad7b4 100644
--- a/cl/lib/pacer_session.py
+++ b/cl/lib/pacer_session.py
@@ -122,7 +122,7 @@ def get_or_cache_pacer_cookies(
     password: str,
     client_code: str | None = None,
     refresh: bool = False,
-) -> RequestsCookieJar:
+) -> tuple[RequestsCookieJar, str]:
     """Get PACER cookies for a user or create and cache fresh ones
 
     For the PACER Fetch API, we store users' PACER cookies in Redis with a
@@ -131,7 +131,7 @@ def get_or_cache_pacer_cookies(
 
     This function attempts to get cookies for a user from Redis. If it finds
     them, it returns them. If not, it attempts to log the user in and then
-    returns the fresh cookies (after caching them).
+    returns the fresh cookies and the proxy used to login(after caching them).
 
     :param user_pk: The PK of the user attempting to store their credentials.
     Needed to create the key in Redis.
@@ -139,21 +139,27 @@ def get_or_cache_pacer_cookies(
     :param password: The PACER password of the user
     :param client_code: The PACER client code of the user
     :param refresh: If True, refresh the cookies even if they're already cached
-    :return: Cookies for the PACER user
+    :return: A tuple containing the Request.CookieJar and the proxy address
     """
     r = get_redis_interface("CACHE", decode_responses=False)
-    cookies = get_pacer_cookie_from_cache(user_pk, r=r)
+    cookies_data = get_pacer_cookie_from_cache(user_pk, r=r)
     ttl_seconds = r.ttl(session_key % user_pk)
-    if cookies and ttl_seconds >= 300 and not refresh:
+    if cookies_data and ttl_seconds >= 300 and not refresh:
         # cookies were found in cache and ttl >= 5 minutes, return them
-        return cookies
+        if isinstance(cookies_data, tuple):
+            return cookies_data
+        return cookies_data, settings.EGRESS_PROXY_HOST
 
     # Unable to find cookies in cache, are about to expire or refresh needed
     # Login and cache new values.
-    cookies = log_into_pacer(username, password, client_code)
+    cookies, proxy = log_into_pacer(username, password, client_code)
     cookie_expiration = 60 * 60
-    r.set(session_key % user_pk, pickle.dumps(cookies), ex=cookie_expiration)
-    return cookies
+    r.set(
+        session_key % user_pk,
+        pickle.dumps((cookies, proxy)),
+        ex=cookie_expiration,
+    )
+    return cookies, proxy
 
 
 def get_pacer_cookie_from_cache(

From 8b3417e0e47ff7cb8446d67a2655c7a326dd26d1 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 15:45:50 -0400
Subject: [PATCH 044/372] feat(corpus_importer): Updates tasks logic and
 signature for improved cookie handling

- Updates tasks logic to accommodate the new format for PACER user cookies.
- Updates function signature to reflect the change in cookie data format.
---
 cl/corpus_importer/bulk_utils.py    |   8 +-
 cl/corpus_importer/task_canvases.py |  18 +--
 cl/corpus_importer/tasks.py         | 184 ++++++++++++++++------------
 cl/corpus_importer/tests.py         |   4 +-
 4 files changed, 125 insertions(+), 89 deletions(-)

diff --git a/cl/corpus_importer/bulk_utils.py b/cl/corpus_importer/bulk_utils.py
index 730a98b61d..711f768366 100644
--- a/cl/corpus_importer/bulk_utils.py
+++ b/cl/corpus_importer/bulk_utils.py
@@ -94,11 +94,11 @@ def get_petitions(
             logger.info(f"Sent {i} tasks to celery so far.")
         logger.info("Doing row %s", i)
         throttle.maybe_wait()
-
+        cookies_data = pacer_session.cookies, pacer_session.proxy_address
         chain(
-            get_pacer_doc_by_rd.s(
-                rd_pk, pacer_session.cookies, tag=tag_petitions
-            ).set(queue=q),
+            get_pacer_doc_by_rd.s(rd_pk, cookies_data, tag=tag_petitions).set(
+                queue=q
+            ),
             extract_recap_pdf.si(rd_pk).set(queue=q),
             add_items_to_solr.si([rd_pk], "search.RECAPDocument").set(queue=q),
         ).apply_async()
diff --git a/cl/corpus_importer/task_canvases.py b/cl/corpus_importer/task_canvases.py
index 143c061417..579d22eacb 100644
--- a/cl/corpus_importer/task_canvases.py
+++ b/cl/corpus_importer/task_canvases.py
@@ -18,7 +18,9 @@
 from cl.search.tasks import add_or_update_recap_docket
 
 
-def get_docket_and_claims(docket_number, court, case_name, cookies, tags, q):
+def get_docket_and_claims(
+    docket_number, court, case_name, cookies_data, tags, q
+):
     """Get the docket report, claims history report, and save it all to the DB
     and Solr
     """
@@ -27,13 +29,13 @@ def get_docket_and_claims(docket_number, court, case_name, cookies, tags, q):
             pass_through=None,
             docket_number=docket_number,
             court_id=court,
-            cookies=cookies,
+            cookies_data=cookies_data,
             case_name=case_name,
             docket_number_letters="bk",
         ).set(queue=q),
         get_docket_by_pacer_case_id.s(
             court_id=court,
-            cookies=cookies,
+            cookies_data=cookies_data,
             tag_names=tags,
             **{
                 "show_parties_and_counsel": True,
@@ -41,9 +43,9 @@ def get_docket_and_claims(docket_number, court, case_name, cookies, tags, q):
                 "show_list_of_member_cases": False,
             }
         ).set(queue=q),
-        get_bankr_claims_registry.s(cookies=cookies, tag_names=tags).set(
-            queue=q
-        ),
+        get_bankr_claims_registry.s(
+            cookies_data=cookies_data, tag_names=tags
+        ).set(queue=q),
         add_or_update_recap_docket.s().set(queue=q),
     ).apply_async()
 
@@ -72,7 +74,9 @@ def get_district_attachment_pages(options, rd_pks, tag_names, session):
             break
         throttle.maybe_wait()
         chain(
-            get_attachment_page_by_rd.s(rd_pk, session.cookies).set(queue=q),
+            get_attachment_page_by_rd.s(
+                rd_pk, (session.cookies, session.proxy_address)
+            ).set(queue=q),
             make_attachment_pq_object.s(rd_pk, recap_user.pk).set(queue=q),
             process_recap_attachment.s(tag_names=tag_names).set(queue=q),
         ).apply_async()
diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 09b2a10526..982e097bf2 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -335,7 +335,7 @@ def get_and_save_free_document_report(
     :param end: a date object representing the last day to get results.
     :return: The status code of the scrape
     """
-    cookies = get_or_cache_pacer_cookies(
+    cookies, proxy_address = get_or_cache_pacer_cookies(
         "pacer_scraper",
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
@@ -344,6 +344,7 @@ def get_and_save_free_document_report(
         cookies=cookies,
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
+        proxy=proxy_address,
     )
     report = FreeOpinionReport(court_id, s)
     msg = ""
@@ -605,14 +606,14 @@ def get_and_process_free_pdf(
             return None
         raise self.retry()
 
-    cookies = get_or_cache_pacer_cookies(
+    cookies_data = get_or_cache_pacer_cookies(
         "pacer_scraper",
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
     )
     try:
         r, r_msg = download_pacer_pdf_by_rd(
-            rd.pk, result.pacer_case_id, result.pacer_doc_id, cookies
+            rd.pk, result.pacer_case_id, result.pacer_doc_id, cookies_data
         )
     except HTTPError as exc:
         if exc.response and exc.response.status_code in [
@@ -938,12 +939,12 @@ def get_pacer_case_id_and_title(
     pass_through: Any,
     docket_number: str,
     court_id: str,
-    cookies: Optional[RequestsCookieJar] = None,
-    user_pk: Optional[int] = None,
-    case_name: Optional[str] = None,
-    office_number: Optional[str] = None,
-    docket_number_letters: Optional[str] = None,
-) -> Optional[TaskData]:
+    cookies_data: tuple[RequestsCookieJar, str] | None = None,
+    user_pk: int | None = None,
+    case_name: str | None = None,
+    office_number: str | None = None,
+    docket_number_letters: str | None = None,
+) -> TaskData | None:
     """Get the pacer_case_id and title values for a district court docket. Use
     heuristics to disambiguate the results.
 
@@ -959,8 +960,9 @@ def get_pacer_case_id_and_title(
     :param docket_number: The docket number to look up. This is a flexible
     field that accepts a variety of docket number styles.
     :param court_id: The CourtListener court ID for the docket number
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-in PACER user.
+    :param cookies_data: A tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address (optional)
+    as a string.
     :param user_pk: The PK of a user making the request. This can be provided
     instead of the cookies parameter. If so, this will get the user's cookies
     from redis instead of passing them in as an argument.
@@ -988,10 +990,21 @@ def get_pacer_case_id_and_title(
         docket_number,
         court_id,
     )
-    if not cookies:
-        # Get cookies from Redis if not provided
-        cookies = get_pacer_cookie_from_cache(user_pk)  # type: ignore
-    s = ProxyPacerSession(cookies=cookies)
+
+    if cookies_data:
+        cookies, proxy_address = cookies_data
+    elif user_pk:
+        cookies_from_cache = get_pacer_cookie_from_cache(user_pk)
+        if isinstance(cookies_from_cache, tuple):
+            cookies, proxy_address = cookies_from_cache
+        cookies, proxy_address = cookies_from_cache, settings.EGRESS_PROXY_HOST
+    else:
+        # If the user_pk is unavailable, cookies cannot be retrieved from the
+        # cache.
+        self.request.chain = None
+        return None
+
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     report = PossibleCaseNumberApi(map_cl_to_pacer_id(court_id), s)
     msg = ""
     try:
@@ -1040,9 +1053,9 @@ def do_case_query_by_pacer_case_id(
     self: Task,
     data: TaskData,
     court_id: str,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
     tag_names: List[str] | None = None,
-) -> Optional[TaskData]:
+) -> TaskData | None:
     """Run a case query (iquery.pl) query on a case and save the data
 
     :param self: The celery task
@@ -1056,7 +1069,8 @@ def do_case_query_by_pacer_case_id(
     saving it in the DB.
     :return: A dict with the pacer_case_id and docket_pk values.
     """
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = cookies_data
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     if data is None:
         logger.info("Empty data argument. Terminating chains and exiting.")
         self.request.chain = None
@@ -1165,7 +1179,7 @@ def query_case_query_report(
     :return: The report.data.
     """
 
-    cookies = get_or_cache_pacer_cookies(
+    cookies, proxy_address = get_or_cache_pacer_cookies(
         "pacer_scraper",
         settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
@@ -1174,6 +1188,7 @@ def query_case_query_report(
         cookies=cookies,
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
+        proxy=proxy_address,
     )
     report = CaseQuery(map_cl_to_pacer_id(court_id), s)
     report.query(pacer_case_id)
@@ -1481,11 +1496,11 @@ def get_docket_by_pacer_case_id(
     self: Task,
     data: TaskData,
     court_id: str,
-    cookies: Optional[RequestsCookieJar] = None,
+    cookies_data: tuple[RequestsCookieJar, str],
     docket_pk: Optional[int] = None,
     tag_names: Optional[str] = None,
     **kwargs,
-) -> Optional[TaskData]:
+) -> TaskData | None:
     """Get a docket by PACER case id, CL court ID, and a collection of kwargs
     that can be passed to the DocketReport query.
 
@@ -1497,8 +1512,9 @@ def get_docket_by_pacer_case_id(
         Optional: 'docket_pk': The ID of the docket to work on to avoid lookups
                   if it's known in advance.
     :param court_id: A courtlistener court ID.
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-in PACER user.
+    :param cookies_data: A tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :param docket_pk: The PK of the docket to update. Can also be provided in
     the data param, above.
     :param tag_names: A list of tag names that should be stored with the item
@@ -1532,7 +1548,8 @@ def get_docket_by_pacer_case_id(
 
     logging_id = f"{court_id}.{pacer_case_id}"
     logger.info("Querying docket report %s", logging_id)
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = cookies_data
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     report = DocketReport(map_cl_to_pacer_id(court_id), s)
     try:
         report.query(pacer_case_id, **kwargs)
@@ -1583,7 +1600,7 @@ def get_appellate_docket_by_docket_number(
     self: Task,
     docket_number: str,
     court_id: str,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
     tag_names: Optional[List[str]] = None,
     **kwargs,
 ) -> Optional[TaskData]:
@@ -1595,13 +1612,15 @@ def get_appellate_docket_by_docket_number(
     :param self: The celery task
     :param docket_number: The docket number of the case.
     :param court_id: A courtlistener/PACER appellate court ID.
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-in PACER user.
+    :param cookies_data: A tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :param tag_names: The tag name that should be stored with the item in the
     DB, if desired.
     :param kwargs: A variety of keyword args to pass to DocketReport.query().
     """
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = cookies_data
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     report = AppellateDocketReport(court_id, s)
     logging_id = f"{court_id} - {docket_number}"
     logger.info("Querying docket report %s", logging_id)
@@ -1651,20 +1670,21 @@ def get_appellate_docket_by_docket_number(
 
 def get_att_report_by_rd(
     rd: RECAPDocument,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
 ) -> Optional[AttachmentPage]:
     """Method to get the attachment report for the item in PACER.
 
     :param rd: The RECAPDocument object to use as a source.
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-on PACER user.
+    :param cookies_data: A tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :return: The attachment report populated with the results
     """
-
     if not rd.pacer_doc_id:
         return None
 
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = cookies_data
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     pacer_court_id = map_cl_to_pacer_id(rd.docket_entry.docket.court_id)
     att_report = AttachmentPage(pacer_court_id, s)
     att_report.query(rd.pacer_doc_id)
@@ -1682,14 +1702,15 @@ def get_att_report_by_rd(
 def get_attachment_page_by_rd(
     self: Task,
     rd_pk: int,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
 ) -> Optional[AttachmentPage]:
     """Get the attachment page for the item in PACER.
 
     :param self: The celery task
     :param rd_pk: The PK of a RECAPDocument object to use as a source.
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-on PACER user.
+    :param cookies_data: tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :return: The attachment report populated with the results
     """
     rd = RECAPDocument.objects.get(pk=rd_pk)
@@ -1698,7 +1719,7 @@ def get_attachment_page_by_rd(
         self.request.chain = None
         return None
     try:
-        att_report = get_att_report_by_rd(rd, cookies)
+        att_report = get_att_report_by_rd(rd, cookies_data)
     except HTTPError as exc:
         if exc.response and exc.response.status_code in [
             HTTPStatus.INTERNAL_SERVER_ERROR,
@@ -1736,21 +1757,23 @@ def get_attachment_page_by_rd(
 def get_bankr_claims_registry(
     self: Task,
     data: TaskData,
-    cookies: RequestsCookieJar,
-    tag_names: Optional[List[str]] = None,
-) -> Optional[TaskData]:
+    cookies_data: tuple[RequestsCookieJar, str],
+    tag_names: List[str] | None = None,
+) -> TaskData | None:
     """Get the bankruptcy claims registry for a docket
 
     :param self: The celery task
     :param data: A dict of data containing, primarily, a key to 'docket_pk' for
     the docket for which we want to get the registry. Other keys will be
     ignored.
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-in PACER user.
+    :param cookies_data: A tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :param tag_names: A list of tag names that should be stored with the claims
     registry information in the DB.
     """
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = cookies_data
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     if data is None or data.get("docket_pk") is None:
         logger.warning(
             "Empty data argument or parameter. Terminating chains "
@@ -1848,8 +1871,8 @@ def download_pacer_pdf_by_rd(
     rd_pk: int,
     pacer_case_id: str,
     pacer_doc_id: int,
-    cookies: RequestsCookieJar,
-    magic_number: Optional[str] = None,
+    cookies_data: tuple[RequestsCookieJar, str],
+    magic_number: str | None = None,
 ) -> tuple[Response | None, str]:
     """Using a RECAPDocument object ID, download the PDF if it doesn't already
     exist.
@@ -1857,18 +1880,19 @@ def download_pacer_pdf_by_rd(
     :param rd_pk: The PK of the RECAPDocument to download
     :param pacer_case_id: The internal PACER case ID number
     :param pacer_doc_id: The internal PACER document ID to download
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-in PACER user.
+    :param cookies_data: A tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :param magic_number: The magic number to fetch PACER documents for free
     this is an optional field, only used by RECAP Email documents
     :return: A two-tuple of requests.Response object usually containing a PDF,
     or None if that wasn't possible, and a string representing the error if
     there was one.
     """
-
+    cookies, proxy_address = cookies_data
     rd = RECAPDocument.objects.get(pk=rd_pk)
     pacer_court_id = map_cl_to_pacer_id(rd.docket_entry.docket.court_id)
-    s = ProxyPacerSession(cookies=cookies)
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     report = FreeOpinionReport(pacer_court_id, s)
 
     r, r_msg = report.download_pdf(pacer_case_id, pacer_doc_id, magic_number)
@@ -1880,7 +1904,7 @@ def download_pdf_by_magic_number(
     court_id: str,
     pacer_doc_id: str,
     pacer_case_id: str,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
     magic_number: str,
     appellate: bool = False,
 ) -> tuple[Response | None, str]:
@@ -1889,15 +1913,17 @@ def download_pdf_by_magic_number(
     :param court_id: A CourtListener court ID to query the free document.
     :param pacer_doc_id: The pacer_doc_id to query the free document.
     :param pacer_case_id: The pacer_case_id to query the free document.
-    :param cookies: The cookies of a logged in PACER session
+    :param cookies_data: tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :param magic_number: The magic number to fetch PACER documents for free.
     :param appellate: Whether the download belongs to an appellate court.
     :return: A two-tuple of requests.Response object usually containing a PDF,
     or None if that wasn't possible, and a string representing the error if
     there was one.
     """
-
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = cookies_data
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     report = FreeOpinionReport(court_id, s)
     r, r_msg = report.download_pdf(
         pacer_case_id, pacer_doc_id, magic_number, appellate
@@ -1916,10 +1942,10 @@ def get_document_number_from_confirmation_page(
     """
 
     recap_email_user = User.objects.get(username="recap-email")
-    cookies = get_or_cache_pacer_cookies(
+    cookies, proxy_address = get_or_cache_pacer_cookies(
         recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD
     )
-    s = ProxyPacerSession(cookies=cookies)
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     doc_num_report = DownloadConfirmationPage(court_id, s)
     doc_num_report.query(pacer_doc_id)
     data = doc_num_report.data
@@ -1990,11 +2016,10 @@ def is_pacer_doc_sealed(court_id: str, pacer_doc_id: str) -> bool:
     """
 
     recap_email_user = User.objects.get(username="recap-email")
-    cookies = get_or_cache_pacer_cookies(
+    cookies, proxy_address = get_or_cache_pacer_cookies(
         recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD
     )
-
-    s = ProxyPacerSession(cookies=cookies)
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     receipt_report = DownloadConfirmationPage(court_id, s)
     receipt_report.query(pacer_doc_id)
     data = receipt_report.data
@@ -2021,11 +2046,11 @@ def is_docket_entry_sealed(
         return False
 
     recap_email_user = User.objects.get(username="recap-email")
-    cookies = get_or_cache_pacer_cookies(
+    cookies, proxy_address = get_or_cache_pacer_cookies(
         recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD
     )
 
-    s = ProxyPacerSession(cookies=cookies)
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     report = BaseReport(court_id, s)
     return report.is_entry_sealed(case_id, doc_id)
 
@@ -2128,14 +2153,16 @@ def add_tags(rd: RECAPDocument, tag_name: Optional[str]) -> None:
 def get_pacer_doc_by_rd(
     self: Task,
     rd_pk: int,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
     tag: Optional[str] = None,
 ) -> Optional[int]:
     """A simple method for getting the PDF associated with a RECAPDocument.
 
     :param self: The bound celery task
     :param rd_pk: The PK for the RECAPDocument object
-    :param cookies: The cookies of a logged in PACER session
+    :param cookies_data: tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :param tag: The name of a tag to apply to any modified items
     :return: The RECAPDocument PK
     """
@@ -2148,7 +2175,7 @@ def get_pacer_doc_by_rd(
 
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
     r, r_msg = download_pacer_pdf_by_rd(
-        rd.pk, pacer_case_id, rd.pacer_doc_id, cookies
+        rd.pk, pacer_case_id, rd.pacer_doc_id, cookies_data
     )
     court_id = rd.docket_entry.docket.court_id
 
@@ -2186,7 +2213,7 @@ def get_pacer_doc_by_rd_and_description(
     self: Task,
     rd_pk: int,
     description_re: Pattern,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
     fallback_to_main_doc: bool = False,
     tag_name: Optional[List[str]] = None,
 ) -> None:
@@ -2200,15 +2227,16 @@ def get_pacer_doc_by_rd_and_description(
     :param rd_pk: The PK of a RECAPDocument object to use as a source.
     :param description_re: A compiled regular expression to search against the
     description provided by the attachment page.
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-in PACER user.
+    :param cookies_data: tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :param fallback_to_main_doc: Should we grab the main doc if none of the
     attachments match the regex?
     :param tag_name: A tag name to apply to any downloaded content.
     :return: None
     """
     rd = RECAPDocument.objects.get(pk=rd_pk)
-    att_report = get_attachment_page_by_rd(self, rd_pk, cookies)
+    att_report = get_attachment_page_by_rd(self, rd_pk, cookies_data)
 
     att_found = None
     for attachment in att_report.data.get("attachments", []):
@@ -2257,7 +2285,7 @@ def get_pacer_doc_by_rd_and_description(
 
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
     r, r_msg = download_pacer_pdf_by_rd(
-        rd.pk, pacer_case_id, att_found["pacer_doc_id"], cookies
+        rd.pk, pacer_case_id, att_found["pacer_doc_id"], cookies_data
     )
     court_id = rd.docket_entry.docket.court_id
 
@@ -2295,18 +2323,20 @@ def get_pacer_doc_by_rd_and_description(
 def get_pacer_doc_id_with_show_case_doc_url(
     self: Task,
     rd_pk: int,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
 ) -> None:
     """use the show_case_doc URL to get pacer_doc_id values.
 
     :param self: The celery task
     :param rd_pk: The pk of the RECAPDocument you want to get.
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-in PACER user.
+    :param cookies_data: tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     """
     rd = RECAPDocument.objects.get(pk=rd_pk)
     d = rd.docket_entry.docket
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = cookies_data
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     pacer_court_id = map_cl_to_pacer_id(d.court_id)
     report = ShowCaseDocApi(pacer_court_id, s)
     last_try = self.request.retries == self.max_retries
@@ -2396,7 +2426,7 @@ def make_list_of_creditors_key(court_id: str, d_number_file_name: str) -> str:
 @throttle_task("1/s", key="court_id")
 def query_and_save_list_of_creditors(
     self: Task,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
     court_id: str,
     d_number_file_name: str,
     docket_number: str,
@@ -2408,7 +2438,9 @@ def query_and_save_list_of_creditors(
     HTML and pipe-limited text files and convert them to CSVs.
 
     :param self: The celery task
-    :param cookies: The cookies for the current PACER session.
+    :param cookies_data: tuple containing the PACER user's cookies
+    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
+    as a string.
     :param court_id: The court_id for the bankruptcy court.
     :param d_number_file_name: The docket number to use as file name.
     :param docket_number: The docket number of the case.
@@ -2418,8 +2450,8 @@ def query_and_save_list_of_creditors(
 
     :return: None
     """
-
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = cookies_data
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     try:
         report = ListOfCreditors(court_id, s)
     except AssertionError:
diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index fc745568d4..6291121f39 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -497,7 +497,7 @@ def test_party_parsing(self) -> None:
 
     @patch(
         "cl.corpus_importer.tasks.get_or_cache_pacer_cookies",
-        return_value=None,
+        return_value=(None, None),
     )
     def test_get_and_save_free_document_report(self, mock_cookies) -> None:
         """Test the retrieval and storage of free document report data."""
@@ -3342,7 +3342,7 @@ def test_merger(self):
 
 @patch(
     "cl.corpus_importer.tasks.get_or_cache_pacer_cookies",
-    return_value=None,
+    return_value=(None, None),
 )
 @override_settings(
     IQUERY_PROBE_DAEMON_ENABLED=True,

From 588818d389295a47865f8ecea73e43749a40a4a6 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 15:47:09 -0400
Subject: [PATCH 045/372] feat(recap): Tweaks tasks to handle the new format
 for user cookies.

---
 cl/recap/tasks.py | 49 +++++++++++++++++++++++++++++++++++------------
 cl/recap/tests.py |  8 ++++----
 2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index c592024d98..6f32dbed06 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -1558,13 +1558,18 @@ def fetch_pacer_doc_by_rd(
         self.request.chain = None
         return
 
+    cookies_data = (
+        cookies
+        if isinstance(cookies, tuple)
+        else (cookies, settings.EGRESS_PROXY_HOST)
+    )
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
     try:
         r, r_msg = download_pacer_pdf_by_rd(
             rd.pk,
             pacer_case_id,
             rd.pacer_doc_id,
-            cookies,
+            cookies_data,
             magic_number,
         )
     except (requests.RequestException, HTTPError):
@@ -1656,8 +1661,13 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> None:
         mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
         return
 
+    cookies_data = (
+        cookies
+        if isinstance(cookies, tuple)
+        else (cookies, settings.EGRESS_PROXY_HOST)
+    )
     try:
-        r = get_att_report_by_rd(rd, cookies)
+        r = get_att_report_by_rd(rd, cookies_data)
     except HTTPError as exc:
         msg = "Failed to get attachment page from network."
         if exc.response.status_code in [
@@ -1829,14 +1839,19 @@ def fetch_docket(self, fq_pk):
 
     async_to_sync(mark_pq_status)(fq, "", PROCESSING_STATUS.IN_PROGRESS)
 
-    cookies = get_pacer_cookie_from_cache(fq.user_id)
-    if cookies is None:
+    cookies_data = get_pacer_cookie_from_cache(fq.user_id)
+    if cookies_data is None:
         msg = f"Cookie cache expired before task could run for user: {fq.user_id}"
         mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
         self.request.chain = None
         return None
 
-    s = ProxyPacerSession(cookies=cookies)
+    cookies, proxy_address = (
+        cookies_data
+        if isinstance(cookies_data, tuple)
+        else (cookies_data, settings.EGRESS_PROXY_HOST)
+    )
+    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     try:
         result = fetch_pacer_case_id_and_title(s, fq, court_id)
     except (requests.RequestException, ReadTimeoutError) as exc:
@@ -2075,7 +2090,7 @@ def save_pacer_doc_from_pq(
 
 def download_pacer_pdf_and_save_to_pq(
     court_id: str,
-    cookies: RequestsCookieJar,
+    cookies_data: tuple[RequestsCookieJar, str],
     cutoff_date: datetime,
     magic_number: str | None,
     pacer_case_id: str,
@@ -2091,7 +2106,7 @@ def download_pacer_pdf_and_save_to_pq(
     PQ object. Increasing the reliability of saving PACER documents.
 
     :param court_id: A CourtListener court ID to query the free document.
-    :param cookies: The cookies of a logged in PACER session
+    :param cookies_data: The cookies of a logged in PACER session
     :param cutoff_date: The datetime from which we should query
      ProcessingQueue objects. For the main RECAPDocument the datetime the
      EmailProcessingQueue was created. For attachments the datetime the
@@ -2128,7 +2143,7 @@ def download_pacer_pdf_and_save_to_pq(
                 court_id,
                 pacer_doc_id,
                 pacer_case_id,
-                cookies,
+                cookies_data,
                 magic_number,
                 appellate,
             )
@@ -2175,13 +2190,18 @@ def get_and_copy_recap_attachment_docs(
     """
 
     cookies = get_pacer_cookie_from_cache(user_pk)
+    cookies_data = (
+        cookies
+        if isinstance(cookies, tuple)
+        else (cookies, settings.EGRESS_PROXY_HOST)
+    )
     appellate = False
     unique_pqs = []
     for rd_att in att_rds:
         cutoff_date = rd_att.date_created
         pq = download_pacer_pdf_and_save_to_pq(
             court_id,
-            cookies,
+            cookies_data,
             cutoff_date,
             magic_number,
             pacer_case_id,
@@ -2286,6 +2306,11 @@ def get_and_merge_rd_attachments(
 
     all_attachment_rds = []
     cookies = get_pacer_cookie_from_cache(user_pk)
+    cookies_data = (
+        cookies
+        if isinstance(cookies, tuple)
+        else (cookies, settings.EGRESS_PROXY_HOST)
+    )
     # Try to get the attachment page without being logged into PACER
     att_report_text = get_attachment_page_by_url(document_url, court_id)
     if att_report_text:
@@ -2297,7 +2322,7 @@ def get_and_merge_rd_attachments(
             .recap_documents.earliest("date_created")
         )
         # Get the attachment page being logged into PACER
-        att_report = get_att_report_by_rd(main_rd, cookies)
+        att_report = get_att_report_by_rd(main_rd, cookies_data)
 
     for docket_entry in dockets_updated:
         # Merge the attachments for each docket/recap document
@@ -2383,7 +2408,7 @@ def process_recap_email(
 
     start_time = now()
     # Ensures we have PACER cookies ready to go.
-    cookies = get_or_cache_pacer_cookies(
+    cookies_data = get_or_cache_pacer_cookies(
         user_pk, settings.PACER_USERNAME, settings.PACER_PASSWORD
     )
     appellate = data["appellate"]
@@ -2391,7 +2416,7 @@ def process_recap_email(
     # its future processing.
     pq = download_pacer_pdf_and_save_to_pq(
         epq.court_id,
-        cookies,
+        cookies_data,
         epq.date_created,
         magic_number,
         pacer_case_id,
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index c01961b029..f04ee7e18e 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -1205,7 +1205,7 @@ async def test_missing_receipt_properties_fails(self):
     )
     @mock.patch(
         "cl.recap.tasks.get_or_cache_pacer_cookies",
-        side_effect=lambda x, y, z: None,
+        side_effect=lambda x, y, z: (None, None),
     )
     @mock.patch(
         "cl.recap.tasks.is_docket_entry_sealed",
@@ -2857,7 +2857,7 @@ def test_create_from_idb_chunk(self) -> None:
 )
 @mock.patch(
     "cl.recap.tasks.get_or_cache_pacer_cookies",
-    side_effect=lambda x, y, z: None,
+    side_effect=lambda x, y, z: (None, None),
 )
 @mock.patch(
     "cl.recap.tasks.is_pacer_court_accessible",
@@ -5315,7 +5315,7 @@ def test_clean_up_recap_document_file(self, mock_open):
 )
 @mock.patch(
     "cl.recap.tasks.get_or_cache_pacer_cookies",
-    side_effect=lambda x, y, z: "Cookie",
+    side_effect=lambda x, y, z: ("Cookie", settings.EGRESS_PROXY_HOST),
 )
 @mock.patch(
     "cl.recap.tasks.get_pacer_cookie_from_cache",
@@ -5689,7 +5689,7 @@ def test_is_pacer_court_accessible_fails(
 )
 @mock.patch(
     "cl.recap.tasks.get_or_cache_pacer_cookies",
-    side_effect=lambda x, y, z: None,
+    side_effect=lambda x, y, z: (None, None),
 )
 @mock.patch(
     "cl.recap.tasks.is_pacer_court_accessible",

From 84894190c38e026545fcefe73bd1b82e421f03c5 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 15:51:49 -0400
Subject: [PATCH 046/372] feat(scrapers): Updates update_docket_info_iquery
 task

- Adapts update_docket_info_iquery task to handle the updated output format of get_or_cache_pacer_cookies.
- Enhances task functionality by passing the proxy parameter to the ProxyPacerSession class
---
 cl/scrapers/tasks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/scrapers/tasks.py b/cl/scrapers/tasks.py
index 504db45df9..d4a7fd2cc2 100644
--- a/cl/scrapers/tasks.py
+++ b/cl/scrapers/tasks.py
@@ -404,7 +404,7 @@ def update_docket_info_iquery(self, d_pk: int, court_id: str) -> None:
     :param court_id: The court of the docket. Needed for throttling by court.
     :return: None
     """
-    cookies = get_or_cache_pacer_cookies(
+    cookies, proxy = get_or_cache_pacer_cookies(
         "pacer_scraper",
         settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
@@ -413,6 +413,7 @@ def update_docket_info_iquery(self, d_pk: int, court_id: str) -> None:
         cookies=cookies,
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
+        proxy=proxy,
     )
     d = Docket.objects.get(pk=d_pk, court_id=court_id)
     report = CaseQuery(map_cl_to_pacer_id(d.court_id), s)

From b93fd90519c3423c59b4306541e07d97a14c1378 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 16:55:02 -0400
Subject: [PATCH 047/372] feat(corpus_importer): Updates commands to use the
 new cookie format

---
 .../management/commands/760_project.py             |  6 +++---
 .../management/commands/adelman_david.py           |  8 ++++----
 .../management/commands/buchwald_project.py        |  4 ++--
 .../management/commands/buried_alive_project.py    |  5 ++++-
 .../management/commands/everything_project.py      |  4 ++--
 .../management/commands/export_control.py          |  2 +-
 .../management/commands/import_patent.py           |  6 +++---
 .../management/commands/invoice_project.py         | 14 ++++++++------
 .../management/commands/jackson_project.py         |  2 +-
 .../management/commands/kessler_ilnb.py            | 14 +++++++++++---
 .../management/commands/legal_robot.py             |  6 +++---
 .../commands/list_of_creditors_project.py          |  2 +-
 cl/corpus_importer/management/commands/nos_700.py  |  4 ++--
 .../management/commands/nywb_chapter_7.py          | 10 ++++++++--
 .../management/commands/merge_idb_into_dockets.py  |  2 +-
 15 files changed, 54 insertions(+), 35 deletions(-)

diff --git a/cl/corpus_importer/management/commands/760_project.py b/cl/corpus_importer/management/commands/760_project.py
index b4a227f0aa..37c5785eef 100644
--- a/cl/corpus_importer/management/commands/760_project.py
+++ b/cl/corpus_importer/management/commands/760_project.py
@@ -55,7 +55,7 @@ def get_dockets(options):
                 get_appellate_docket_by_docket_number.s(
                     docket_number=row["Cleaned case_No"],
                     court_id=row["fjc_court_id"],
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     tag_names=[TAG],
                     **{
                         "show_docket_entries": True,
@@ -75,12 +75,12 @@ def get_dockets(options):
                     pass_through=None,
                     docket_number=row["Cleaned case_No"],
                     court_id=row["fjc_court_id"],
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     case_name=row["Title"],
                 ).set(queue=q),
                 get_docket_by_pacer_case_id.s(
                     court_id=row["fjc_court_id"],
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     tag_names=[TAG],
                     **{
                         "show_parties_and_counsel": True,
diff --git a/cl/corpus_importer/management/commands/adelman_david.py b/cl/corpus_importer/management/commands/adelman_david.py
index f24f58cae3..2844e4ff2b 100644
--- a/cl/corpus_importer/management/commands/adelman_david.py
+++ b/cl/corpus_importer/management/commands/adelman_david.py
@@ -48,7 +48,7 @@ def download_dockets(options):
                 get_appellate_docket_by_docket_number.s(
                     docket_number=row["docket_no1"],
                     court_id=row["cl_court"],
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     tag_names=[PROJECT_TAG_NAME, row_tag],
                     # Do not get the docket entries for now. We're only
                     # interested in the date terminated. If it's an open case,
@@ -71,17 +71,17 @@ def download_dockets(options):
                     pass_through=None,
                     docket_number=row["docket_no1"],
                     court_id=row["cl_court"],
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     case_name=row["name"],
                 ).set(queue=q),
                 do_case_query_by_pacer_case_id.s(
                     court_id=row["cl_court"],
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     tag_names=[PROJECT_TAG_NAME, row_tag],
                 ).set(queue=q),
                 get_docket_by_pacer_case_id.s(
                     court_id=row["cl_court"],
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     tag_names=[PROJECT_TAG_NAME, row_tag],
                     **{
                         # No docket entries
diff --git a/cl/corpus_importer/management/commands/buchwald_project.py b/cl/corpus_importer/management/commands/buchwald_project.py
index 7beb4865af..6b6dbba000 100644
--- a/cl/corpus_importer/management/commands/buchwald_project.py
+++ b/cl/corpus_importer/management/commands/buchwald_project.py
@@ -59,7 +59,7 @@ def add_all_nysd_to_cl(options):
         throttle.maybe_wait()
         logger.info("Doing pacer_case_id: %s", pacer_case_id)
         make_docket_by_iquery.apply_async(
-            args=("nysd", pacer_case_id, session.cookies, [NYSD_TAG]),
+            args=("nysd", pacer_case_id, "default", [NYSD_TAG]),
             queue=q,
         )
 
@@ -104,7 +104,7 @@ def get_dockets(options):
             get_docket_by_pacer_case_id.s(
                 data={"pacer_case_id": d.pacer_case_id},
                 court_id=d.court_id,
-                cookies=session.cookies,
+                cookies_data=(session.cookies, session.proxy_address),
                 docket_pk=d.pk,
                 tag_names=[BUCKWALD_TAG],
                 **{
diff --git a/cl/corpus_importer/management/commands/buried_alive_project.py b/cl/corpus_importer/management/commands/buried_alive_project.py
index 880176072e..f84ee6a16a 100644
--- a/cl/corpus_importer/management/commands/buried_alive_project.py
+++ b/cl/corpus_importer/management/commands/buried_alive_project.py
@@ -64,7 +64,10 @@ def get_pacer_dockets(options, docket_pks, tags):
             get_docket_by_pacer_case_id.s(
                 {"pacer_case_id": d.pacer_case_id, "docket_pk": d.pk},
                 d.court_id,
-                cookies=pacer_session.cookies,
+                cookies_data=(
+                    pacer_session.cookies,
+                    pacer_session.proxy_address,
+                ),
                 tag_names=tags,
                 **{
                     "show_parties_and_counsel": True,
diff --git a/cl/corpus_importer/management/commands/everything_project.py b/cl/corpus_importer/management/commands/everything_project.py
index 3ea7d27eb2..a74b9328ff 100644
--- a/cl/corpus_importer/management/commands/everything_project.py
+++ b/cl/corpus_importer/management/commands/everything_project.py
@@ -141,13 +141,13 @@ def get_dockets(options, items, tags, sample_size=0, doc_num_end=""):
                 pass_through=None,
                 docket_number=row.docket_number,
                 court_id=row.district_id,
-                cookies=session.cookies,
+                cookies_data=(session.cookies, session.proxy_address),
                 **params,
             ).set(queue=q),
             filter_docket_by_tags.s(tags, row.district_id).set(queue=q),
             get_docket_by_pacer_case_id.s(
                 court_id=row.district_id,
-                cookies=session.cookies,
+                cookies_data=(session.cookies, session.proxy_address),
                 tag_names=tags,
                 **{
                     "show_parties_and_counsel": True,
diff --git a/cl/corpus_importer/management/commands/export_control.py b/cl/corpus_importer/management/commands/export_control.py
index da434bd83f..518f22a61c 100644
--- a/cl/corpus_importer/management/commands/export_control.py
+++ b/cl/corpus_importer/management/commands/export_control.py
@@ -85,7 +85,7 @@ def get_data(options, row_transform, tags):
             row["docket_number"],
             row["court"],
             row["case_name"],
-            session.cookies,
+            (session.cookies, session.proxy_address),
             tags,
             q,
         )
diff --git a/cl/corpus_importer/management/commands/import_patent.py b/cl/corpus_importer/management/commands/import_patent.py
index f207f649ab..1762126457 100644
--- a/cl/corpus_importer/management/commands/import_patent.py
+++ b/cl/corpus_importer/management/commands/import_patent.py
@@ -78,12 +78,12 @@ def get_dockets(options: dict) -> None:
                     pass_through=None,
                     docket_number=item.docket_number,
                     court_id=item.district_id,
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     **params,
                 ).set(queue=q),
                 get_docket_by_pacer_case_id.s(
                     court_id=item.district_id,
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     tag_names=PATENT_TAGS,
                     **{
                         "show_parties_and_counsel": True,
@@ -101,7 +101,7 @@ def get_dockets(options: dict) -> None:
                 get_docket_by_pacer_case_id.s(
                     data={"pacer_case_id": d.pacer_case_id},
                     court_id=d.court_id,
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     docket_pk=d.pk,
                     tag_names=PATENT_TAGS,
                     **{
diff --git a/cl/corpus_importer/management/commands/invoice_project.py b/cl/corpus_importer/management/commands/invoice_project.py
index 8f3f889c34..d0195491ea 100644
--- a/cl/corpus_importer/management/commands/invoice_project.py
+++ b/cl/corpus_importer/management/commands/invoice_project.py
@@ -83,9 +83,9 @@ def get_attachment_pages(options):
             throttle.maybe_wait()
             chain(
                 # Query the attachment page and process it
-                get_attachment_page_by_rd.s(result["id"], session.cookies).set(
-                    queue=q
-                ),
+                get_attachment_page_by_rd.s(
+                    result["id"], (session.cookies, session.proxy_address)
+                ).set(queue=q),
                 # Take that in a new task and make a PQ object
                 make_attachment_pq_object.s(result["id"], recap_user.pk).set(
                     queue=q
@@ -150,9 +150,11 @@ def get_documents(options):
             continue
 
         chain(
-            get_pacer_doc_by_rd.s(rd.pk, session.cookies, tag=TAG_PHASE_2).set(
-                queue=q
-            ),
+            get_pacer_doc_by_rd.s(
+                rd.pk,
+                (session.cookies, session.proxy_address),
+                tag=TAG_PHASE_2,
+            ).set(queue=q),
             extract_recap_pdf.si(rd.pk).set(queue=q),
             add_items_to_solr.si([rd.pk], "search.RECAPDocument").set(queue=q),
         ).apply_async()
diff --git a/cl/corpus_importer/management/commands/jackson_project.py b/cl/corpus_importer/management/commands/jackson_project.py
index 1e7fd98e3b..f4b420de3f 100644
--- a/cl/corpus_importer/management/commands/jackson_project.py
+++ b/cl/corpus_importer/management/commands/jackson_project.py
@@ -41,7 +41,7 @@ def get_dockets(options):
             get_docket_by_pacer_case_id.s(
                 data={"pacer_case_id": d.pacer_case_id},
                 court_id=d.court_id,
-                cookies=session.cookies,
+                cookies_data=(session.cookies, session.proxy_address),
                 docket_pk=d.pk,
                 tag_names=[JACKSON_TAG],
                 **{
diff --git a/cl/corpus_importer/management/commands/kessler_ilnb.py b/cl/corpus_importer/management/commands/kessler_ilnb.py
index a3ad701b23..d70df6e92e 100644
--- a/cl/corpus_importer/management/commands/kessler_ilnb.py
+++ b/cl/corpus_importer/management/commands/kessler_ilnb.py
@@ -60,13 +60,19 @@ def get_dockets(options):
                     row["docket"], row["office"]
                 ),
                 court_id="ilnb",
-                cookies=pacer_session.cookies,
+                cookies_data=(
+                    pacer_session.cookies,
+                    pacer_session.proxy_address,
+                ),
                 office_number=row["office"],
                 docket_number_letters="bk",
             ).set(queue=q),
             get_docket_by_pacer_case_id.s(
                 court_id="ilnb",
-                cookies=pacer_session.cookies,
+                cookies_data=(
+                    pacer_session.cookies,
+                    pacer_session.proxy_address,
+                ),
                 tag_names=[TAG],
                 **{
                     "show_parties_and_counsel": True,
@@ -118,7 +124,9 @@ def get_final_docs(options):
             throttle.maybe_wait()
             chain(
                 get_pacer_doc_by_rd.s(
-                    rd_pk, pacer_session.cookies, tag=TAG_FINALS
+                    rd_pk,
+                    (pacer_session.cookies, pacer_session.proxy_address),
+                    tag=TAG_FINALS,
                 ).set(queue=q),
                 extract_recap_pdf.si(rd_pk).set(queue=q),
                 add_items_to_solr.si([rd_pk], "search.RECAPDocument").set(
diff --git a/cl/corpus_importer/management/commands/legal_robot.py b/cl/corpus_importer/management/commands/legal_robot.py
index d6bc38244f..f37685f470 100644
--- a/cl/corpus_importer/management/commands/legal_robot.py
+++ b/cl/corpus_importer/management/commands/legal_robot.py
@@ -79,9 +79,9 @@ def get_documents(options):
             continue
 
         chain(
-            get_pacer_doc_by_rd.s(rd.pk, session.cookies, tag=TAG).set(
-                queue=q
-            ),
+            get_pacer_doc_by_rd.s(
+                rd.pk, (session.cookies, session.proxy_address), tag=TAG
+            ).set(queue=q),
             extract_recap_pdf.si(rd.pk).set(queue=q),
             add_items_to_solr.si([rd.pk], "search.RECAPDocument").set(queue=q),
         ).apply_async()
diff --git a/cl/corpus_importer/management/commands/list_of_creditors_project.py b/cl/corpus_importer/management/commands/list_of_creditors_project.py
index 9783903212..83482110b7 100644
--- a/cl/corpus_importer/management/commands/list_of_creditors_project.py
+++ b/cl/corpus_importer/management/commands/list_of_creditors_project.py
@@ -139,7 +139,7 @@ def query_and_save_creditors_data(options: OptionsType) -> None:
                 )
                 throttle.maybe_wait()
                 query_and_save_list_of_creditors.si(
-                    session.cookies,
+                    (session.cookies, session.proxy_address),
                     court_id,
                     d_number_file_name,
                     docket_number,
diff --git a/cl/corpus_importer/management/commands/nos_700.py b/cl/corpus_importer/management/commands/nos_700.py
index 915c030eef..600cf8188f 100644
--- a/cl/corpus_importer/management/commands/nos_700.py
+++ b/cl/corpus_importer/management/commands/nos_700.py
@@ -257,13 +257,13 @@ def get_dockets(options, items, tags, sample_size=0):
                 pass_through=None,
                 docket_number=row.docket_number,
                 court_id=row.district_id,
-                cookies=session.cookies,
+                cookies_data=(session.cookies, session.proxy_address),
                 **params,
             ).set(queue=q),
             filter_docket_by_tags.s(tags, row.district_id).set(queue=q),
             get_docket_by_pacer_case_id.s(
                 court_id=row.district_id,
-                cookies=session.cookies,
+                cookies_data=(session.cookies, session.proxy_address),
                 tag_names=tags,
                 **{
                     "show_parties_and_counsel": True,
diff --git a/cl/corpus_importer/management/commands/nywb_chapter_7.py b/cl/corpus_importer/management/commands/nywb_chapter_7.py
index 7efa9888fa..d66dd7027a 100644
--- a/cl/corpus_importer/management/commands/nywb_chapter_7.py
+++ b/cl/corpus_importer/management/commands/nywb_chapter_7.py
@@ -55,13 +55,19 @@ def get_dockets(options):
                     row["DOCKET"], row["OFFICE"]
                 ),
                 court_id="nywb",
-                cookies=pacer_session.cookies,
+                cookies_data=(
+                    pacer_session.cookies,
+                    pacer_session.proxy_address,
+                ),
                 office_number=row["OFFICE"],
                 docket_number_letters="bk",
             ).set(queue=q),
             get_docket_by_pacer_case_id.s(
                 court_id="nywb",
-                cookies=pacer_session.cookies,
+                cookies_data=(
+                    pacer_session.cookies,
+                    pacer_session.proxy_address,
+                ),
                 tag_names=[TAG],
                 **{
                     "doc_num_start": 1,
diff --git a/cl/recap/management/commands/merge_idb_into_dockets.py b/cl/recap/management/commands/merge_idb_into_dockets.py
index 0fe62e0c85..e8a741020d 100644
--- a/cl/recap/management/commands/merge_idb_into_dockets.py
+++ b/cl/recap/management/commands/merge_idb_into_dockets.py
@@ -142,7 +142,7 @@ def update_any_missing_pacer_case_ids(options):
                     pass_through=d.pk,
                     docket_number=d.idb_data.docket_number,
                     court_id=d.idb_data.district_id,
-                    cookies=session.cookies,
+                    cookies_data=(session.cookies, session.proxy_address),
                     **params,
                 ).set(queue=q),
                 update_docket_from_hidden_api.s().set(queue=q),

From 693552db948b1a1e6971ade141d02bc53ae64315 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 10 Jul 2024 18:42:07 -0400
Subject: [PATCH 048/372] feat(lib): Adds tests for pacer session utils

---
 cl/lib/tests.py | 132 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index 6507fab826..ff86a720e3 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -1,8 +1,13 @@
 import datetime
+import pickle
 from typing import Tuple, TypedDict, cast
+from unittest.mock import patch
 
 from asgiref.sync import async_to_sync
+from django.conf import settings
 from django.core.files.base import ContentFile
+from django.test import override_settings
+from requests.cookies import RequestsCookieJar
 
 from cl.lib.date_time import midnight_pt
 from cl.lib.elasticsearch_utils import append_query_conjunctions
@@ -21,6 +26,11 @@
     normalize_attorney_role,
     normalize_us_state,
 )
+from cl.lib.pacer_session import (
+    ProxyPacerSession,
+    get_or_cache_pacer_cookies,
+    session_key,
+)
 from cl.lib.privacy_tools import anonymize
 from cl.lib.ratelimiter import parse_rate
 from cl.lib.redis_utils import (
@@ -80,6 +90,128 @@ def test_auto_blocking_small_bankr_docket(self) -> None:
         )
 
 
+class TestPacerSessionUtils(TestCase):
+
+    def setUp(self) -> None:
+        r = get_redis_interface("CACHE", decode_responses=False)
+        self.test_cookies = RequestsCookieJar()
+        self.test_cookies.set("PacerSession", "this-is-a-test")
+        r.set(
+            session_key % "test_user_old_format",
+            pickle.dumps(self.test_cookies),
+            ex=60 * 60,
+        )
+        r.set(
+            session_key % "test_user_new_format",
+            pickle.dumps((self.test_cookies, "http://proxy_1:9090")),
+            ex=60 * 60,
+        )
+        r.set(
+            session_key % "test_old_format_almost_expired",
+            pickle.dumps(self.test_cookies),
+            ex=60,
+        )
+        r.set(
+            session_key % "test_new_format_almost_expired",
+            pickle.dumps((self.test_cookies, "http://proxy_1:9090")),
+            ex=60,
+        )
+
+    def test_use_default_proxy_if_list_not_available(self) -> None:
+        """Does ProxyPacerSession uses the default proxy when no list is provided?"""
+        session = ProxyPacerSession(username="test", password="password")
+        self.assertEqual(session.proxy_address, settings.EGRESS_PROXY_HOST)
+
+    @override_settings(
+        EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
+    )
+    def test_pick_random_proxy_when_list_is_available(self):
+        """Does ProxyPacerSession choose a random proxy from the available list?"""
+        session = ProxyPacerSession(username="test", password="password")
+        self.assertNotEqual(session.proxy_address, settings.EGRESS_PROXY_HOST)
+        self.assertIn(
+            session.proxy_address,
+            ["http://proxy_1:9090", "http://proxy_2:9090"],
+        )
+
+    def test_use_default_proxy_host_for_old_cookie_format(self):
+        """Can we handle the old cookie format properly?"""
+        cookies_data = get_or_cache_pacer_cookies(
+            "test_user_old_format", username="test", password="password"
+        )
+        self.assertIsInstance(cookies_data, tuple)
+        _, proxy = cookies_data
+        self.assertEqual(proxy, settings.EGRESS_PROXY_HOST)
+
+    @override_settings(
+        EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
+    )
+    @patch("cl.lib.pacer_session.log_into_pacer")
+    def test_compute_new_cookies_with_new_format(self, mock_log_into_pacer):
+        """Are we using the tuple format for new cookies?"""
+        mock_log_into_pacer.return_value = (
+            self.test_cookies,
+            "http://proxy_1:9090",
+        )
+        cookies_data = get_or_cache_pacer_cookies(
+            "test_user_new_cookie", username="test", password="password"
+        )
+        self.assertIsInstance(cookies_data, tuple)
+        _, proxy = cookies_data
+        self.assertEqual(proxy, "http://proxy_1:9090")
+
+    def test_parse_cookie_proxy_pair_properly(self):
+        """Can we parse the tuple format from cache properly?"""
+        cookies_data = get_or_cache_pacer_cookies(
+            "test_user_new_format", username="test", password="password"
+        )
+        self.assertIsInstance(cookies_data, tuple)
+        _, proxy = cookies_data
+        self.assertEqual(proxy, "http://proxy_1:9090")
+
+    @override_settings(
+        EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
+    )
+    @patch("cl.lib.pacer_session.log_into_pacer")
+    def test_compute_cookies_for_almost_expired_data(
+        self, mock_log_into_pacer
+    ):
+        """Are we using the tuple format when re-computing session?"""
+        mock_log_into_pacer.return_value = (
+            self.test_cookies,
+            "http://proxy_1:9090",
+        )
+
+        # Attempts to get almost expired cookies with the old format from cache
+        # Expects refresh.
+        cookies = get_or_cache_pacer_cookies(
+            "test_old_format_almost_expired",
+            username="test",
+            password="password",
+        )
+        self.assertIsInstance(cookies, tuple)
+        _, proxy = cookies
+        self.assertEqual(mock_log_into_pacer.call_count, 1)
+        self.assertEqual(proxy, "http://proxy_1:9090")
+
+        mock_log_into_pacer.return_value = (
+            self.test_cookies,
+            "http://proxy_2:9090",
+        )
+
+        # Attempts to get almost expired cookies with the new format from cache
+        # Expects refresh.
+        cookies = get_or_cache_pacer_cookies(
+            "test_new_format_almost_expired",
+            username="test",
+            password="password",
+        )
+        self.assertIsInstance(cookies, tuple)
+        _, proxy = cookies
+        self.assertEqual(mock_log_into_pacer.call_count, 2)
+        self.assertEqual(proxy, "http://proxy_2:9090")
+
+
 class TestStringUtils(SimpleTestCase):
     def test_trunc(self) -> None:
         """Does trunc give us the results we expect?"""

From 5a47599f92b6437929d735197ab2f24492331e9f Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 12 Jul 2024 12:20:15 -0400
Subject: [PATCH 049/372] feat(pacer_session): Updates logic to pick a proxy
 connection str

---
 cl/lib/pacer_session.py |  9 ++-------
 cl/lib/tests.py         | 17 +++--------------
 2 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py
index a7543ad7b4..2fa6094060 100644
--- a/cl/lib/pacer_session.py
+++ b/cl/lib/pacer_session.py
@@ -47,17 +47,12 @@ def _pick_proxy_connection(self) -> str:
         """
         Picks a proxy connection string from available options.
 
-        If the `settings.EGRESS_PROXY_HOSTS` list is empty, this function
-        returns the value from `settings.EGRESS_PROXY_HOST`. Otherwise, it
-        randomly chooses a string from the `settings.EGRESS_PROXY_HOSTS` list
-        and returns it.
+        this function randomly chooses a string from the
+        `settings.EGRESS_PROXY_HOSTS` list and returns it.
 
         Returns:
             str: The chosen proxy connection string.
         """
-        if not settings.EGRESS_PROXY_HOSTS:
-            return settings.EGRESS_PROXY_HOST
-
         return random.choice(settings.EGRESS_PROXY_HOSTS)
 
     def _change_protocol(self, url: str) -> str:
diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index ff86a720e3..da59a87964 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -90,6 +90,9 @@ def test_auto_blocking_small_bankr_docket(self) -> None:
         )
 
 
+@override_settings(
+    EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
+)
 class TestPacerSessionUtils(TestCase):
 
     def setUp(self) -> None:
@@ -117,14 +120,6 @@ def setUp(self) -> None:
             ex=60,
         )
 
-    def test_use_default_proxy_if_list_not_available(self) -> None:
-        """Does ProxyPacerSession uses the default proxy when no list is provided?"""
-        session = ProxyPacerSession(username="test", password="password")
-        self.assertEqual(session.proxy_address, settings.EGRESS_PROXY_HOST)
-
-    @override_settings(
-        EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
-    )
     def test_pick_random_proxy_when_list_is_available(self):
         """Does ProxyPacerSession choose a random proxy from the available list?"""
         session = ProxyPacerSession(username="test", password="password")
@@ -143,9 +138,6 @@ def test_use_default_proxy_host_for_old_cookie_format(self):
         _, proxy = cookies_data
         self.assertEqual(proxy, settings.EGRESS_PROXY_HOST)
 
-    @override_settings(
-        EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
-    )
     @patch("cl.lib.pacer_session.log_into_pacer")
     def test_compute_new_cookies_with_new_format(self, mock_log_into_pacer):
         """Are we using the tuple format for new cookies?"""
@@ -169,9 +161,6 @@ def test_parse_cookie_proxy_pair_properly(self):
         _, proxy = cookies_data
         self.assertEqual(proxy, "http://proxy_1:9090")
 
-    @override_settings(
-        EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
-    )
     @patch("cl.lib.pacer_session.log_into_pacer")
     def test_compute_cookies_for_almost_expired_data(
         self, mock_log_into_pacer

From b0b6eb36af8097cab578e7ab5c843a823bc6516b Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 12 Jul 2024 13:52:52 -0400
Subject: [PATCH 050/372] feat(corpus_importer): Adds an exception to the
 get_pacer_case_id_and_title helper

---
 cl/corpus_importer/tasks.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 982e097bf2..1711fd8104 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -999,10 +999,9 @@ def get_pacer_case_id_and_title(
             cookies, proxy_address = cookies_from_cache
         cookies, proxy_address = cookies_from_cache, settings.EGRESS_PROXY_HOST
     else:
-        # If the user_pk is unavailable, cookies cannot be retrieved from the
-        # cache.
-        self.request.chain = None
-        return None
+        raise Exception(
+            "user_pk is unavailable, cookies cannot be retrieved from cache"
+        )
 
     s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     report = PossibleCaseNumberApi(map_cl_to_pacer_id(court_id), s)

From 02141d97af0b1a8bd7e3447d053d56f168a09f7a Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 12 Jul 2024 15:17:30 -0400
Subject: [PATCH 051/372] feat(test): Override PROXY_HOSTS setting for tests

---
 cl/corpus_importer/tests.py | 3 +++
 cl/recap/tests.py           | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index 6291121f39..0ff43d865d 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -401,6 +401,9 @@ def test_get_appellate_court_object_from_string(self) -> None:
             self.assertEqual(test["a"], got)
 
 
+@override_settings(
+    EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
+)
 @pytest.mark.django_db
 class PacerDocketParserTest(TestCase):
     """Can we parse RECAP dockets successfully?"""
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index f04ee7e18e..ab63750a59 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -16,7 +16,7 @@
 from django.core import mail
 from django.core.files.base import ContentFile
 from django.core.files.uploadedfile import SimpleUploadedFile
-from django.test import RequestFactory
+from django.test import RequestFactory, override_settings
 from django.urls import reverse
 from django.utils.timezone import now
 from juriscraper.pacer import PacerRssFeed
@@ -1145,6 +1145,9 @@ def mock_bucket_open(message_id, r, read_file=False):
     return recap_mail_example
 
 
+@override_settings(
+    EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"]
+)
 class RecapEmailToEmailProcessingQueueTest(TestCase):
     """Test the rest endpoint, but exclude the processing tasks."""
 

From f997bcccf160b854aa6860148e5609c8725805e9 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 12 Jul 2024 16:02:24 -0400
Subject: [PATCH 052/372] fix(tests): Override PROXY_HOSTS setting for the
 ScrapeIqueryPagesTest class

---
 cl/corpus_importer/tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index 9d388f7fb8..13bf5022fc 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -3351,6 +3351,7 @@ def test_merger(self):
 @override_settings(
     IQUERY_PROBE_DAEMON_ENABLED=True,
     IQUERY_SWEEP_UPLOADS_SIGNAL_ENABLED=True,
+    EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"],
 )
 class ScrapeIqueryPagesTest(TestCase):
     """Tests related to probe_iquery_pages_daemon command."""

From fb4c668a264319733f9dfb07b2f98a0bf4e18c55 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 12 Jul 2024 15:31:49 -0600
Subject: [PATCH 053/372] feat(scrape_pacer_free_opinions): sweep options

---
 .../commands/scrape_pacer_free_opinions.py    | 120 ++++++++++++++++--
 1 file changed, 110 insertions(+), 10 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index 1b40006ab2..7c0e0d2d52 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -1,4 +1,5 @@
 import argparse
+import calendar
 import datetime
 import os
 from typing import Callable, Dict, List, Optional, Tuple, cast
@@ -6,6 +7,7 @@
 from celery.canvas import chain
 from django.conf import settings
 from django.utils.timezone import now
+from juriscraper.lib.date_utils import make_date_range_tuples
 from juriscraper.lib.exceptions import PacerLoginException
 from juriscraper.lib.string_utils import CaseNameTweaker
 from requests import RequestException
@@ -326,19 +328,116 @@ def ocr_available(options: OptionsType) -> None:
             logger.info(f"Sent {i + 1}/{count} tasks to celery so far.")
 
 
-def do_monthly():
-    # Run everything monthly range
-    pass
+def do_quarterly(options: OptionsType):
+    """Collect last quarter documents
 
+    Run it every three months (0 0 1 */3 *)
 
-def do_weekly():
-    # Run everything weekly range
-    pass
+    :return: None
+    """
+    first_day_current_month = datetime.datetime.now().replace(day=1)
+
+    # Calculate the first day of the month three months ago
+    if first_day_current_month.month <= 3:
+        start_year = first_day_current_month.year - 1
+        start_month = first_day_current_month.month + 9
+    else:
+        start_year = first_day_current_month.year
+        start_month = first_day_current_month.month - 3
+    start_date = datetime.date(start_year, start_month, 1)
+
+    # Calculate the last day of the month prior to today
+    last_month = first_day_current_month - datetime.timedelta(days=1)
+    end_day = calendar.monthrange(last_month.year, last_month.month)[1]
+    end_date = datetime.date(last_month.year, last_month.month, end_day)
+
+    dates = make_date_range_tuples(start_date, end_date, gap=7)
+
+    for _start, _end in dates:
+        # We run this in 7-day date ranges to ingest all the information on a weekly
+        # basis and not wait for all the responses from three months ago to now from
+        # each court. This also allows us to scrape each court every 7 day range to
+        # avoid possible blockages.
+        options["date_start"] = _start  # type: ignore
+        options["date_end"] = _end  # type: ignore
+        do_everything(options)
+
+
+def do_monthly(options: OptionsType):
+    """Collect last month's documents
+
+    Run it on the 3rd of each month to let them update the last days of the month
+    (15 2 3 * *)
+
+    :return: None
+    """
+    today = datetime.date.today()
+    prev_month, current_year = (
+        (today.month - 1, today.year)
+        if today.month != 1
+        else (12, today.year - 1)
+    )
+    month_last_day = calendar.monthrange(current_year, prev_month)[1]
+    start = datetime.date(current_year, prev_month, 1)
+    end = datetime.date(current_year, prev_month, month_last_day)
+
+    # Update options with start and end date of previous month
+    options["date_start"] = start  # type: ignore
+    options["date_end"] = end  # type: ignore
+
+    do_everything(options)
+
+
+def do_weekly(options: OptionsType):
+    """Collect last week's documents
+
+    Run it every wednesday (* * * * 3)
+
+    :return: None
+    """
+
+    today = datetime.date.today()
+    weekday = today.weekday()
+    start_of_this_week = today - datetime.timedelta(days=weekday)
+    start_of_previous_week = start_of_this_week - datetime.timedelta(weeks=1)
+    end_of_previous_week = start_of_previous_week + datetime.timedelta(days=6)
+
+    # Update options with start and end date of previous week
+    options["date_start"] = start_of_previous_week  # type: ignore
+    options["date_end"] = end_of_previous_week  # type: ignore
+
+    do_everything(options)
+
+
+def do_all(options: OptionsType):
+    """Collect all documents since the beginning of time
+
+    It was established on this date based on the PacerFreeDocumentLog table. The first
+    date queried is 1950-05-12 from ca9.
+
+    The command will be executed until the day on which it is executed.
+
+    To collect all documents, weekly, monthly and quarterly sweeps will be used to make
+    sure we don't miss anything.
+
+    Take note that documents could be missing if they were marked as free after these
+    periods.
+
+    :return: None
+    """
+    start = datetime.date(1950, 5, 1)
+    end = datetime.date.today()
 
+    dates = make_date_range_tuples(start, end, gap=7)
 
-def do_all():
-    # run all courts since first day started to query each court
-    pass
+    for _start, _end in dates:
+        # We run this in 7-day date ranges to ingest all the information on a weekly
+        # basis and not wait for all the responses from 1950 to now from each court (
+        # ~3900 weeks/requests until today). This also allows us to scrape each court
+        # every 7 day range to avoid possible blockages.
+        options["date_start"] = _start  # type: ignore
+        options["date_end"] = _end  # type: ignore
+        do_everything(options)
 
 
 def do_everything(options: OptionsType):
@@ -387,7 +486,7 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None:
             type=str,
             default=["all"],
             nargs="*",
-            help="The courts that you wish to parse.",
+            help="The courts that you wish to parse. Use cl ids.",
         )
         parser.add_argument(
             "--date-start",
@@ -421,6 +520,7 @@ def handle(self, *args: List[str], **options: OptionsType) -> None:
         "get-report-results": get_and_save_free_document_reports,
         "get-pdfs": get_pdfs,
         "ocr-available": ocr_available,
+        "do-quarterly": do_quarterly,
         "do-monthly": do_monthly,
         "do-weekly": do_weekly,
         "do-all": do_all,

From a3011bb168e1d1dd1234d3e943d4a250c2269428 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 12 Jul 2024 16:14:17 -0600
Subject: [PATCH 054/372] feat(citation_importer): catch invalid volume numbers

Fix start-end/end-row options
Add a log message when we already have a citation from a reporter
use logger instead of print
---
 .../commands/import_citations_csv.py          | 27 +++++++++++++++----
 cl/corpus_importer/utils.py                   |  3 +++
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/cl/citations/management/commands/import_citations_csv.py b/cl/citations/management/commands/import_citations_csv.py
index c709ab710e..f23649eeb6 100644
--- a/cl/citations/management/commands/import_citations_csv.py
+++ b/cl/citations/management/commands/import_citations_csv.py
@@ -38,9 +38,15 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader:
     end_row = None
 
     if options["start_row"] and options["end_row"]:
-        start_row = options["start_row"] if options["start_row"] > 1 else 0
+        start_row = options["start_row"] - 1 if options["start_row"] > 1 else 0
         end_row = options["end_row"] - options["start_row"] + 1  # inclusive
 
+    if options["start_row"] and not options["end_row"]:
+        start_row = options["start_row"] - 1 if options["start_row"] > 1 else 0
+
+    if options["end_row"] and not options["start_row"]:
+        end_row = options["end_row"]
+
     if options["limit"]:
         end_row = options["limit"]
 
@@ -78,7 +84,18 @@ def process_csv_data(
             continue
 
         if cluster_id and citation_to_add:
-            add_citations_to_cluster([citation_to_add], cluster_id)
+            try:
+                add_citations_to_cluster([citation_to_add], cluster_id)
+            except Exception as e:
+                if "Field 'volume' expected" in str(e):
+                    # Fail silently, we already know this issue
+                    logger.info(
+                        f"Row: {index} - Invalid volume in citation: {citation_to_add} for cluster: {cluster_id}"
+                    )
+                    continue
+                else:
+                    # Unknown issue
+                    raise
 
 
 class Command(BaseCommand):
@@ -114,15 +131,15 @@ def add_arguments(self, parser):
     def handle(self, *args, **options):
         if options["start_row"] and options["end_row"]:
             if options["start_row"] > options["end_row"]:
-                print("--start-row can't be greater than --end-row")
+                logger.info("--start-row can't be greater than --end-row")
                 return
 
         if not os.path.exists(options["csv"]):
-            print(f"Csv file: {options['csv']} doesn't exist.")
+            logger.info(f"Csv file: {options['csv']} doesn't exist.")
             return
 
         data = load_citations_file(options)
         if not data.empty:
             process_csv_data(data)
         else:
-            print("CSV file empty")
+            logger.info("CSV file is empty or start/end row returned no rows.")
diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index c60696e795..e6551b9152 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -641,6 +641,9 @@ def add_citations_to_cluster(cites: list[str], cluster_id: int) -> None:
         ).exists():
             # Avoid adding a citation if we already have a citation from the
             # citation's reporter
+            logger.info(
+                f"We already have a citation from the reporter: {citation[0].corrected_reporter()} in the cluster id: {cluster_id}"
+            )
             continue
 
         try:

From 36528fdaf07f518b811fb2b9e0de8af2856e48d9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 15 Jul 2024 16:49:04 -0600
Subject: [PATCH 055/372] feat(scrape_pacer_free_opinions): optional start date
 when running from the beginning of time (1950)

Update code to save html files
---
 .../commands/scrape_pacer_free_opinions.py    |  9 +++-
 cl/corpus_importer/tasks.py                   | 43 ++++++++-----------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index 7c0e0d2d52..4586f0c0dd 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -425,7 +425,12 @@ def do_all(options: OptionsType):
 
     :return: None
     """
-    start = datetime.date(1950, 5, 1)
+
+    start = (
+        options["date_start"]
+        if options["date_start"]
+        else datetime.date(1950, 5, 1)
+    )
     end = datetime.date.today()
 
     dates = make_date_range_tuples(start, end, gap=7)
@@ -508,7 +513,7 @@ def handle(self, *args: List[str], **options: OptionsType) -> None:
 
         if options["date_start"] and options["date_end"]:
             if options["date_start"] > options["date_end"]:  # type: ignore
-                print(
+                logger.info(
                     "Error: date-end must be greater or equal than date-start option."
                 )
                 return
diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 697050fb99..c8bdc12a88 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -398,30 +398,25 @@ def get_and_save_free_document_report(
     if log_id:
         # We only save the html when the script is run automatically every day
         log = PACERFreeDocumentLog.objects.get(pk=log_id)
-        for result in report.responses:
-            if isinstance(result, dict):
-                response = result.get("response")
-                query_start = result.get("start")
-                query_end = result.get("end")
-
-                if response and query_start and query_end:
-                    pacer_file = PacerHtmlFiles(
-                        content_object=log,
-                        upload_type=UPLOAD_TYPE.FREE_OPINIONS_REPORT,
-                    )
-                    pacer_file.filepath.save(
-                        f"free_opinions_report_{court_id}_from_{query_start.replace('/', '-')}_to_{query_end.replace('/', '-')}.html",
-                        ContentFile(response.text.encode()),
-                    )
-            else:
-                # FreeOpinionReport now returns a list of dicts with additional data
-                # instead of a list of requests responses.
-                # This is temporary while the new version of juriscraper is added to
-                # courtlistener
-                logger.info(
-                    "New version of juriscraper not yet implemented. Can't "
-                    "save PacerHtmlFiles object."
-                )
+        if hasattr(report, "responses_with_params"):
+            for result in report.responses_with_params:
+                # FreeOpinionReport now also returns a list of dicts with additional
+                # data instead of a list of requests responses. We do this to verify
+                # if we have the new version of juriscraper with the new attribute.
+                if isinstance(result, dict):
+                    response = result.get("response")
+                    query_start = result.get("start")
+                    query_end = result.get("end")
+
+                    if response and query_start and query_end:
+                        pacer_file = PacerHtmlFiles(
+                            content_object=log,
+                            upload_type=UPLOAD_TYPE.FREE_OPINIONS_REPORT,
+                        )
+                        pacer_file.filepath.save(
+                            f"free_opinions_report_{court_id}_from_{query_start.replace('/', '-')}_to_{query_end.replace('/', '-')}.html",
+                            ContentFile(response.text.encode()),
+                        )
 
     document_rows_to_create = []
     for row in results:

From 7c00aa1a012e50dd2b314fb48c4b7fa7ff07a02d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 16 Jul 2024 12:49:38 -0600
Subject: [PATCH 056/372] feat(scrape_pacer_free_opinions): rename migrations

---
 ...pe_noop.py => 0015_alter_pacerhtmlfiles_upload_type_noop.py} | 2 +-
 ..._noop.sql => 0015_alter_pacerhtmlfiles_upload_type_noop.sql} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/recap/migrations/{0014_alter_pacerhtmlfiles_upload_type_noop.py => 0015_alter_pacerhtmlfiles_upload_type_noop.py} (97%)
 rename cl/recap/migrations/{0014_alter_pacerhtmlfiles_upload_type_noop.sql => 0015_alter_pacerhtmlfiles_upload_type_noop.sql} (100%)

diff --git a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py
similarity index 97%
rename from cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py
rename to cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py
index a6b0c7ac66..9c3ea8d835 100644
--- a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py
+++ b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py
@@ -5,7 +5,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("recap", "0013_processingqueue_update"),
+        ("recap", "0014_add_acms_upload_type_noop"),
     ]
 
     operations = [
diff --git a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.sql
similarity index 100%
rename from cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql
rename to cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.sql

From 0b50d675c380a2b2d92e744a7779edf85adb636e Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 16 Jul 2024 13:05:31 -0600
Subject: [PATCH 057/372] feat(scrape_pacer_free_opinions): update migration

---
 .../0015_alter_pacerhtmlfiles_upload_type_noop.py           | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py
index 9c3ea8d835..f2c53368e6 100644
--- a/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py
+++ b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py
@@ -30,7 +30,8 @@ class Migration(migrations.Migration):
                     (14, "Case query result page"),
                     (15, "Appellate Case query result page"),
                     (16, "ACMS docket JSON object"),
-                    (17, "Free opinions report"),
+                    (17, "ACMS attachmente page JSON object"),
+                    (18, "Free opinions report"),
                 ],
                 help_text="The type of object that is uploaded",
             ),
@@ -56,7 +57,8 @@ class Migration(migrations.Migration):
                     (14, "Case query result page"),
                     (15, "Appellate Case query result page"),
                     (16, "ACMS docket JSON object"),
-                    (17, "Free opinions report"),
+                    (17, "ACMS attachmente page JSON object"),
+                    (18, "Free opinions report"),
                 ],
                 help_text="The type of object that is uploaded",
             ),

From 9c819ceaedac4004f5329b81d544024c59c9d156 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 17 Jul 2024 11:21:35 -0600
Subject: [PATCH 058/372] feat(scrape_pacer_free_opinions): remove previous
 changes

add a delay option to control time between cittions
---
 .../commands/import_citations_csv.py          | 28 ++++++++-----------
 cl/corpus_importer/utils.py                   |  3 --
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/cl/citations/management/commands/import_citations_csv.py b/cl/citations/management/commands/import_citations_csv.py
index f23649eeb6..98bd88255a 100644
--- a/cl/citations/management/commands/import_citations_csv.py
+++ b/cl/citations/management/commands/import_citations_csv.py
@@ -15,6 +15,7 @@
 """
 
 import os.path
+import time
 
 import numpy as np
 import pandas as pd
@@ -64,12 +65,11 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader:
     return data
 
 
-def process_csv_data(
-    data: DataFrame | TextFileReader,
-) -> None:
+def process_csv_data(data: DataFrame | TextFileReader, options: dict) -> None:
     """Process citations from csv file
 
     :param data: rows from csv file
+    :param options: options passed to command
     :return: None
     """
 
@@ -84,18 +84,8 @@ def process_csv_data(
             continue
 
         if cluster_id and citation_to_add:
-            try:
-                add_citations_to_cluster([citation_to_add], cluster_id)
-            except Exception as e:
-                if "Field 'volume' expected" in str(e):
-                    # Fail silently, we already know this issue
-                    logger.info(
-                        f"Row: {index} - Invalid volume in citation: {citation_to_add} for cluster: {cluster_id}"
-                    )
-                    continue
-                else:
-                    # Unknown issue
-                    raise
+            add_citations_to_cluster([citation_to_add], cluster_id)
+            time.sleep(options["delay"])
 
 
 class Command(BaseCommand):
@@ -127,6 +117,12 @@ def add_arguments(self, parser):
             help="Limit number of rows to process.",
             required=False,
         )
+        parser.add_argument(
+            "--delay",
+            type=float,
+            default=1.0,
+            help="How long to wait to add each citation (in seconds, allows floating numbers).",
+        )
 
     def handle(self, *args, **options):
         if options["start_row"] and options["end_row"]:
@@ -140,6 +136,6 @@ def handle(self, *args, **options):
 
         data = load_citations_file(options)
         if not data.empty:
-            process_csv_data(data)
+            process_csv_data(data, options)
         else:
             logger.info("CSV file is empty or start/end row returned no rows.")
diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index e6551b9152..c60696e795 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -641,9 +641,6 @@ def add_citations_to_cluster(cites: list[str], cluster_id: int) -> None:
         ).exists():
             # Avoid adding a citation if we already have a citation from the
             # citation's reporter
-            logger.info(
-                f"We already have a citation from the reporter: {citation[0].corrected_reporter()} in the cluster id: {cluster_id}"
-            )
             continue
 
         try:

From dc92cd64f4073cf8b3e3b076dbcf950d6a8edb40 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 17 Jul 2024 12:29:36 -0600
Subject: [PATCH 059/372] feat(scrape_pacer_free_opinions): update function to
 pass delay and start and end rows

---
 .../commands/import_citations_csv.py          | 45 ++++++++-----------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/cl/citations/management/commands/import_citations_csv.py b/cl/citations/management/commands/import_citations_csv.py
index 98bd88255a..c559961995 100644
--- a/cl/citations/management/commands/import_citations_csv.py
+++ b/cl/citations/management/commands/import_citations_csv.py
@@ -35,27 +35,20 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader:
     :return: loaded data
     """
 
-    start_row = None
     end_row = None
 
-    if options["start_row"] and options["end_row"]:
-        start_row = options["start_row"] - 1 if options["start_row"] > 1 else 0
-        end_row = options["end_row"] - options["start_row"] + 1  # inclusive
-
-    if options["start_row"] and not options["end_row"]:
-        start_row = options["start_row"] - 1 if options["start_row"] > 1 else 0
-
-    if options["end_row"] and not options["start_row"]:
-        end_row = options["end_row"]
-
-    if options["limit"]:
-        end_row = options["limit"]
+    if options["end_row"] or options["limit"]:
+        end_row = (
+            options["limit"]
+            if options["limit"] > options["end_row"]
+            else options["end_row"]
+        )
 
     data = pd.read_csv(
         options["csv"],
         names=["cluster_id", "citation_to_add"],
         delimiter=",",
-        skiprows=start_row,
+        skiprows=options["start_row"] - 1 if options["start_row"] else None,
         nrows=end_row,
     )
 
@@ -65,11 +58,11 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader:
     return data
 
 
-def process_csv_data(data: DataFrame | TextFileReader, options: dict) -> None:
+def process_csv_data(data: DataFrame | TextFileReader, delay_s: float) -> None:
     """Process citations from csv file
 
     :param data: rows from csv file
-    :param options: options passed to command
+    :param delay_s: how long to wait to add each citation
     :return: None
     """
 
@@ -78,14 +71,12 @@ def process_csv_data(data: DataFrame | TextFileReader, options: dict) -> None:
         citation_to_add = row.get("citation_to_add")
 
         if not OpinionCluster.objects.filter(id=cluster_id).exists():
-            logger.info(
-                f"Row: {index} - Opinion cluster doesn't exist: {cluster_id}"
-            )
+            logger.info(f"Opinion cluster doesn't exist: {cluster_id}")
             continue
 
         if cluster_id and citation_to_add:
             add_citations_to_cluster([citation_to_add], cluster_id)
-            time.sleep(options["delay"])
+            time.sleep(delay_s)
 
 
 class Command(BaseCommand):
@@ -102,11 +93,13 @@ def add_arguments(self, parser):
         )
         parser.add_argument(
             "--start-row",
+            default=0,
             type=int,
             help="Start row (inclusive).",
         )
         parser.add_argument(
             "--end-row",
+            default=0,
             type=int,
             help="End row (inclusive).",
         )
@@ -121,14 +114,14 @@ def add_arguments(self, parser):
             "--delay",
             type=float,
             default=1.0,
-            help="How long to wait to add each citation (in seconds, allows floating numbers).",
+            help="How long to wait to add each citation (in seconds, allows floating "
+            "numbers).",
         )
 
     def handle(self, *args, **options):
-        if options["start_row"] and options["end_row"]:
-            if options["start_row"] > options["end_row"]:
-                logger.info("--start-row can't be greater than --end-row")
-                return
+        if options["end_row"] and options["start_row"] > options["end_row"]:
+            logger.info("--start-row can't be greater than --end-row")
+            return
 
         if not os.path.exists(options["csv"]):
             logger.info(f"Csv file: {options['csv']} doesn't exist.")
@@ -136,6 +129,6 @@ def handle(self, *args, **options):
 
         data = load_citations_file(options)
         if not data.empty:
-            process_csv_data(data, options)
+            process_csv_data(data, options["delay"])
         else:
             logger.info("CSV file is empty or start/end row returned no rows.")

From 2d690b87cdf2f0016c6d6a43d7d23d2fe4ec5de8 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Wed, 17 Jul 2024 23:34:45 -0500
Subject: [PATCH 060/372] fix(make_objects, save_everything): ingest new fields
 from juriscraper

Also, generalize overwritting prevention in `update_or_create_docket`

Remove incorrect assignment of "summaries" to OpinionCluster.syllabus

save_everything now:
- gets the Person object from  Opinion.author_str to put in Opinion.author
- does not assign a OpinionCluster.judges to Opinion.author when Opinion.author_str has a explicit value
---
 .../management/commands/cl_scrape_opinions.py | 39 +++++++++++++++----
 cl/scrapers/utils.py                          | 23 +++++------
 2 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index a9bf3d8b2b..aa953cb946 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -83,6 +83,15 @@ def make_objects(
 ) -> Tuple[Docket, Opinion, OpinionCluster, List[Citation]]:
     """Takes the meta data from the scraper and associates it with objects.
 
+    The keys returned by juriscraper scrapers are defined by `self._all_attrs`
+    on OpinionSite and OralArgumentSite, where the legacy convention is to use
+    plural names.
+
+    However, this function is also used by importers and user pages, that
+    may not respect this convention, thus the duplication of singular and
+    plural names, like in
+    `item.get("disposition") or item.get("dispositions", "")`
+
     Returns the created objects.
     """
     blocked = item["blocked_statuses"]
@@ -106,20 +115,27 @@ def make_objects(
         appeal_from_str=item.get("lower_courts", ""),
     )
 
+    # Note that if opinion.author_str has no value, and cluster.judges find
+    # a single judge, opinion.author will be populated with that Person object
+    # Check `save_everything`
+
+    # For a discussion on syllabus vs summary, check
+    # https://github.com/freelawproject/juriscraper/issues/66
     cluster = OpinionCluster(
-        judges=item.get("judges", ""),
         date_filed=item["case_dates"],
         date_filed_is_approximate=item["date_filed_is_approximate"],
         case_name=item["case_names"],
         case_name_short=case_name_short,
         source=item.get("cluster_source") or SOURCES.COURT_WEBSITE,
         precedential_status=item["precedential_statuses"],
-        nature_of_suit=item.get("nature_of_suit", ""),
-        summary=item.get("summary", ""),
         blocked=blocked,
         date_blocked=date_blocked,
+        judges=item.get("judges", ""),
+        nature_of_suit=item.get("nature_of_suit", ""),
+        disposition=item.get("disposition") or item.get("dispositions", ""),
+        other_dates=item.get("other_dates", ""),
+        summary=item.get("summary", ""),
         syllabus=item.get("summaries", ""),
-        disposition=item.get("disposition") or item.get("dispositions", "")
     )
 
     cites = [item.get(key, "") for key in ["citations", "parallel_citations"]]
@@ -139,7 +155,7 @@ def make_objects(
         download_url=url,
         joined_by_str=item.get("joined_by", ""),
         per_curiam=item.get("per_curiam", False),
-        author_str=item.get("author_str") or item.get("authors", "")
+        author_str=item.get("author_str") or item.get("authors", ""),
     )
 
     cf = ContentFile(content)
@@ -168,14 +184,21 @@ def save_everything(
         citation.cluster_id = cluster.pk
         citation.save()
 
+    if opinion.author_str:
+        candidate = async_to_sync(lookup_judges_by_messy_str)(
+            opinion.author_str, docket.court.pk, cluster.date_filed
+        )
+        if candidate:
+            opinion.author = candidate[0]
+
     if cluster.judges:
         candidate_judges = async_to_sync(lookup_judges_by_messy_str)(
             cluster.judges, docket.court.pk, cluster.date_filed
         )
-        if len(candidate_judges) == 1:
-            opinion.author = candidate_judges[0]
 
-        if len(candidate_judges) > 1:
+        if len(candidate_judges) == 1 and not opinion.author_str:
+            opinion.author = candidate_judges[0]
+        elif len(candidate_judges) > 1:
             for candidate in candidate_judges:
                 cluster.panel.add(candidate)
 
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index 4355e3bc7b..f5d5177436 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -323,6 +323,7 @@ def update_or_create_docket(
         "ia_needs_upload": ia_needs_upload,
         "appeal_from_str": appeal_from_str,
         "date_blocked": date_blocked,
+        "date_argued": date_argued,
     }
 
     docket = async_to_sync(find_docket_object)(court_id, None, docket_number)
@@ -330,25 +331,25 @@ def update_or_create_docket(
         # Update the existing docket with the new values
         docket.add_opinions_source(source)
 
-        # Prevent overwriting Docket.date_argued if it exists
-        if date_argued:
-            if docket.date_argued and date_argued != docket.date_argued:
+        for field, value in docket_fields.items():
+            if not value:
+                continue
+            if getattr(docket, field) and getattr(docket, field) != value:
+                # Prevent overwriting values that already exist, since default values
+                # to this function are empty strings or None
                 logger.error(
-                    "Docket %s already has a date_argued %s, different than new date %s",
+                    "Docket %s already has a %s %s, different than new value %s",
                     docket.pk,
-                    docket.date_argued,
-                    date_argued,
+                    field,
+                    getattr(docket, field),
+                    value,
                 )
             else:
-                docket.date_argued = date_argued
-
-        for field, value in docket_fields.items():
-            setattr(docket, field, value)
+                setattr(docket, field, value)
     else:
         # Create a new docket with docket_fields and additional fields
         docket = Docket(
             **docket_fields,
-            date_argued=date_argued,
             source=source,
             docket_number=docket_number,
             court_id=court_id,

From 45e48b6f31eab03a69522e088fa7178c6a6dd2e9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 18 Jul 2024 20:44:03 -0600
Subject: [PATCH 061/372] feat(import_citations_csv): implement reindex
 argument for command

improve the code of the command in general
refactor add_citations_to_cluster function
---
 .../commands/import_citations_csv.py          | 54 +++++++++++++----
 cl/corpus_importer/utils.py                   | 60 ++++++++++++-------
 2 files changed, 81 insertions(+), 33 deletions(-)

diff --git a/cl/citations/management/commands/import_citations_csv.py b/cl/citations/management/commands/import_citations_csv.py
index c559961995..5451a6e67a 100644
--- a/cl/citations/management/commands/import_citations_csv.py
+++ b/cl/citations/management/commands/import_citations_csv.py
@@ -10,14 +10,20 @@
 How to run the command:
 manage.py import_citations_csv --csv /opt/courtlistener/cl/assets/media/wl_citations_1.csv
 
+# Add all citations from the file and reindex existing ones
+manage.py import_citations_csv --csv /opt/courtlistener/cl/assets/media/wl_citations_1.csv --reindex
+
+# Add and index all citations from the file starting from row 2600000 and reindex existing ones
+manage.py import_citations_csv --csv /opt/courtlistener/cl/assets/media/x.csv --start-row 2600000 --delay 0.1
+
 Note: If --limit is greater than --end-row, end row will be ignored
 
 """
 
+import argparse
 import os.path
 import time
 
-import numpy as np
 import pandas as pd
 from django.core.management import BaseCommand
 from pandas import DataFrame
@@ -37,6 +43,8 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader:
 
     end_row = None
 
+    dtype_mapping = {"cluster_id": "int", "citation_to_add": "str"}
+
     if options["end_row"] or options["limit"]:
         end_row = (
             options["limit"]
@@ -47,27 +55,30 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader:
     data = pd.read_csv(
         options["csv"],
         names=["cluster_id", "citation_to_add"],
+        dtype=dtype_mapping,
         delimiter=",",
         skiprows=options["start_row"] - 1 if options["start_row"] else None,
         nrows=end_row,
+        na_filter=False,
     )
 
-    # Replace nan in dataframe
-    data = data.replace(np.nan, "", regex=True)
     logger.info(f"Found {len(data.index)} rows in csv file: {options['csv']}")
     return data
 
 
-def process_csv_data(data: DataFrame | TextFileReader, delay_s: float) -> None:
+def process_csv_data(
+    data: DataFrame | TextFileReader, delay_s: float, reindex: bool
+) -> None:
     """Process citations from csv file
 
     :param data: rows from csv file
     :param delay_s: how long to wait to add each citation
+    :param reindex: force reindex of citations
     :return: None
     """
 
     for index, row in data.iterrows():
-        cluster_id = int(row.get("cluster_id"))
+        cluster_id = row.get("cluster_id")
         citation_to_add = row.get("citation_to_add")
 
         if not OpinionCluster.objects.filter(id=cluster_id).exists():
@@ -75,7 +86,7 @@ def process_csv_data(data: DataFrame | TextFileReader, delay_s: float) -> None:
             continue
 
         if cluster_id and citation_to_add:
-            add_citations_to_cluster([citation_to_add], cluster_id)
+            add_citations_to_cluster([citation_to_add], cluster_id, reindex)
             time.sleep(delay_s)
 
 
@@ -85,9 +96,22 @@ class Command(BaseCommand):
     def __init__(self, *args, **kwargs):
         super(Command, self).__init__(*args, **kwargs)
 
+    def existing_path_type(self, path: str):
+        """Validate file path exists
+
+        :param path: path to validate
+        :return: valid path
+        """
+        if not os.path.exists(path):
+            raise argparse.ArgumentTypeError(
+                f"Csv file: {path} doesn't exist."
+            )
+        return path
+
     def add_arguments(self, parser):
         parser.add_argument(
             "--csv",
+            type=self.existing_path_type,
             help="Absolute path to a CSV file containing the citations to add.",
             required=True,
         )
@@ -117,18 +141,22 @@ def add_arguments(self, parser):
             help="How long to wait to add each citation (in seconds, allows floating "
             "numbers).",
         )
+        parser.add_argument(
+            "--reindex",
+            action="store_true",
+            default=False,
+            help="Reindex citations if they are already in the system",
+        )
 
     def handle(self, *args, **options):
         if options["end_row"] and options["start_row"] > options["end_row"]:
             logger.info("--start-row can't be greater than --end-row")
             return
 
-        if not os.path.exists(options["csv"]):
-            logger.info(f"Csv file: {options['csv']} doesn't exist.")
-            return
-
         data = load_citations_file(options)
-        if not data.empty:
-            process_csv_data(data, options["delay"])
-        else:
+
+        if data.empty:
             logger.info("CSV file is empty or start/end row returned no rows.")
+            return
+
+        process_csv_data(data, options["delay"], options["reindex"])
diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index c60696e795..3e032d08dc 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -612,11 +612,14 @@ def merge_overlapping_data(
     return data_to_update
 
 
-def add_citations_to_cluster(cites: list[str], cluster_id: int) -> None:
+def add_citations_to_cluster(
+    cites: list[str], cluster_id: int, save_again_if_exists: bool = False
+) -> None:
     """Add string citations to OpinionCluster if it has not yet been added
 
     :param cites: citation list
     :param cluster_id: cluster id related to citations
+    :param save_again_if_exists: force save citation if it already exists
     :return: None
     """
     for cite in cites:
@@ -636,29 +639,46 @@ def add_citations_to_cluster(cites: list[str], cluster_id: int) -> None:
             cite_type_str = citation[0].all_editions[0].reporter.cite_type
             reporter_type = map_reporter_db_cite_type(cite_type_str)
 
-        if Citation.objects.filter(
-            cluster_id=cluster_id, reporter=citation[0].corrected_reporter()
-        ).exists():
-            # Avoid adding a citation if we already have a citation from the
-            # citation's reporter
-            continue
-
-        try:
-            o, created = Citation.objects.get_or_create(
-                volume=citation[0].groups["volume"],
-                reporter=citation[0].corrected_reporter(),
-                page=citation[0].groups["page"],
-                type=reporter_type,
+        citation_params = {
+            "volume": citation[0].groups["volume"],
+            "reporter": citation[0].corrected_reporter(),
+            "page": citation[0].groups["page"],
+            "type": reporter_type,
+            "cluster_id": cluster_id,
+        }
+        citation_obj = Citation.objects.filter(**citation_params).first()
+        if citation_obj:
+            if save_again_if_exists:
+                # We already have the citation for the cluster and want to reindex it
+                citation_obj.save()
+                logger.info(
+                    f"Reindexing: {cite} added to cluster id: {cluster_id}"
+                )
+            else:
+                # Ignore and go to the next citation in the list
+                continue
+        else:
+            if Citation.objects.filter(
                 cluster_id=cluster_id,
-            )
-            if created:
+                reporter=citation[0].corrected_reporter(),
+            ).exists():
+                # Avoid adding a citation if we already have a citation from the
+                # citation's reporter.
+                logger.info(
+                    f"Can't add: {cite} to cluster id: {cluster_id}. There is already "
+                    f"a citation from that reporter."
+                )
+                continue
+            try:
+                # We don't have the citation or any citation from the reporter
+                Citation.objects.create(**citation_params)
                 logger.info(
                     f"New citation: {cite} added to cluster id: {cluster_id}"
                 )
-        except IntegrityError:
-            logger.warning(
-                f"Reporter mismatch for cluster: {cluster_id} on cite: {cite}"
-            )
+            except IntegrityError:
+                logger.warning(
+                    f"Reporter mismatch for cluster: {cluster_id} on cite: {cite}"
+                )
 
 
 def update_cluster_panel(

From 594737e11c99561e025578f1033e96c8a7bfa40d Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Fri, 19 Jul 2024 11:07:35 -0300
Subject: [PATCH 062/372] adding missing tables, setting import order to avoid
 foreign key constraint violations

---
 scripts/make_bulk_data.sh | 179 +++++++++++++++++++++++++++++++-------
 1 file changed, 149 insertions(+), 30 deletions(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 3a4a28f58e..56e0844a00 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -184,6 +184,10 @@ politicalaffiliation_fields='(
 	   )'
 politicalaffiliation_csv_filename="people-db-political-affiliations-$(date -I).csv"
 
+# people_db_race
+people_db_race_fields='(id, race)'
+people_db_race_csv_filename="people_db_race-$(date -I).csv"
+
 # people_db_person_race
 people_db_person_race_fields='(
 	       id, person_id, race_id
@@ -258,38 +262,151 @@ disclosures_debt_fields='(
 	   )'
 disclosures_debt_csv_filename="financial-disclosures-debts-$(date -I).csv"
 
+
+people_db_attorneyorganization_fields='(
+	       id, date_created, date_modified, lookup_key, name, address1, address2, city, state, zip_code
+	   )'
+people_db_attorneyorganization_csv_filename="people_db_attorneyorganization-$(date -I).csv"
+
+
+people_db_attorney_fields='(
+	       id, date_created, date_modified, name, contact_raw, phone, fax, email
+	   )'
+people_db_attorney_csv_filename="people_db_attorney-$(date -I).csv"
+
+
+people_db_party_fields='(
+	       id, date_created, date_modified, name, extra_info
+	   )'
+people_db_party_csv_filename="people_db_party-$(date -I).csv"
+
+
+docket_fields='(
+	       id, date_created, date_modified, date_cert_granted, date_cert_denied, date_argued,
+		   date_reargued, date_reargument_denied, case_name_short, case_name, case_name_full, slug,
+		   docket_number, blocked, court_id, assigned_to_id, cause, date_filed, date_list_filing,
+		   date_terminated, filepath_ia, filepath_local, jurisdiction_type, jury_demand,
+		   nature_of_suit, pacer_case_id, referred_to_id, source, assigned_to_str, view_count,
+		   date_last_index, appeal_from_str, appellate_case_type_information,
+		   appellate_fee_status, panel_str, originating_court_information_id, mdl_status,
+		   filepath_ia_json, ia_date_first_change, ia_needs_upload, ia_upload_failure_count,
+		   docket_number_core, idb_data_id
+	   )'
+dockets_csv_filename="search_docket-$(date -I).csv"
+
+
+
+people_db_partytype_fields='(
+	       id, name, docket_id, party_id, date_terminated, extra_info,
+		   highest_offense_level_opening, highest_offense_level_terminated
+	   )'
+people_db_partytype_csv_filename="people_db_partytype-$(date -I).csv"
+
+
+fjcintegrateddatabase_fields='(
+	       id, dataset_source, date_created, date_modified, office, docket_number, origin, date_filed,
+		   jurisdiction, nature_of_suit, title, section, subsection, diversity_of_residence, class_action,
+		   monetary_demand, county_of_residence, arbitrarion_at_filing, arbitration_at_termination, 
+		   multidistrict_litigation_docket_number, plaintiff, defendant, date_transfer, transfer_office,
+		   transfer_docket_number, transfer_oprigin, date_terminated, termination_class_action_status,
+		   procedural_progress, disposition, nature_of_judgement, amount_recieved, judgment, pro_se,
+		   year_of_tape, circuit_id, district_id, nature_of_offense, version
+	   )'
+fjcintegrateddatabase_csv_filename="recap_fjcintegrateddatabase-$(date -I).csv"
+
+
+people_db_criminalcount_fields='(
+	       id, date_created, date_modified, creditor_name, description,
+	       value_code, redacted, financial_disclosure_id
+	   )'
+people_db_criminalcount_csv_filename="people_db_criminalcount-$(date -I).csv"
+
+
+people_db_criminalcomplaint_fields='(
+	       id, name, disposition, status, party_type_id
+	   )'
+people_db_criminalcomplaint_csv_filename="people_db_criminalcomplaint-$(date -I).csv"
+
+
+people_db_role_fields='(
+	       id, role, date_action, attorney_id, docket_id, party_id, role_raw
+	   )'
+people_db_role_csv_filename="people_db_role-$(date -I).csv"
+
+
+people_db_attorneyorganizationassociation_fields='(
+	       id, attorney_id, attorney_organization_id, docket_id
+	   )'
+people_db_attorneyorganizationassociation_csv_filename="people_db_attorneyorganizationassociation-$(date -I).csv"
+
+
+search_docketentry_fields='(
+	       id, date_created_date_modified, date_filed, entry_number, description, docket_id,
+		   pacer_sequence_number, recap_sequence_number
+	   )'
+search_docketentry_csv_filename="search_docketentry-$(date -I).csv"
+
+
+search_opinioncluster_panel_fields='(
+	       id, opinioncluster_id, person_id
+	   )'
+search_opinioncluster_panel_csv_filename="search_opinioncluster_panel-$(date -I).csv"
+
+
+search_opinioncluster_non_participating_judges_fields='(
+	       id, opinioncluster_id, person_id
+	   )'
+search_opinioncluster_non_participating_judges_csv_filename="search_opinioncluster_non_participating_judges-$(date -I).csv"
+
 # If you add or remove a table, you need to update this number
-NUM_TABLES=28
+NUM_TABLES=42
 
 # Every new table added to bulk script should be added as an associative array
-declare -a t_1=("search_court" "$court_fields" "$court_csv_filename")
-declare -a t_2=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename")
-declare -a t_3=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename")
-declare -a t_4=("search_docket" "$docket_fields" "$dockets_csv_filename")
-declare -a t_5=("search_originatingcourtinformation" "$originatingcourtinformation_fields" "$originatingcourtinformation_csv_filename")
+# This ordering is important. Tables with foreign key constraints must be loaded in order.
+declare -a t_1=("people_db_person" "$people_db_person_fields" "$people_db_person_csv_filename")
+declare -a t_2=("people_db_race" "$people_db_race_fields" "$people_db_race_csv_filename")
+declare -a t_3=("people_db_school" "$people_db_school_fields" "$people_db_school_csv_filename")
+declare -a t_4=("search_court" "$court_fields" "$court_csv_filename")
+declare -a t_5=("people_db_position" "$people_db_position_fields" "$people_db_position_csv_filename")
 declare -a t_6=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename")
-declare -a t_7=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
-declare -a t_8=("search_opinion" "$opinion_fields" "$opinions_csv_filename")
-declare -a t_9=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename")
-declare -a t_10=("search_citation" "$citation_fields" "$citations_csv_filename")
-declare -a t_11=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename")
-declare -a t_12=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename")
-declare -a t_13=("people_db_person" "$people_db_person_fields" "$people_db_person_csv_filename")
-declare -a t_14=("people_db_school" "$people_db_school_fields" "$people_db_school_csv_filename")
-declare -a t_15=("people_db_position" "$people_db_position_fields" "$people_db_position_csv_filename")
-declare -a t_16=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename")
-declare -a t_17=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename")
-declare -a t_18=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename")
-declare -a t_19=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename")
-declare -a t_20=("disclosures_financialdisclosure" "$financialdisclosure_fields" "$financialdisclosure_csv_filename")
-declare -a t_21=("disclosures_investment" "$investment_fields" "$investment_csv_filename")
-declare -a t_22=("disclosures_position" "$disclosures_position_fields" "$disclosures_position_csv_filename")
-declare -a t_23=("disclosures_agreement" "$disclosures_agreement_fields" "$disclosures_agreement_csv_filename")
-declare -a t_24=("disclosures_noninvestmentincome" "$noninvestmentincome_fields" "$noninvestmentincome_csv_filename")
-declare -a t_25=("disclosures_spouseincome" "$spouseincome_fields" "$spouseincome_csv_filename")
-declare -a t_26=("disclosures_reimbursement" "$disclosures_reimbursement_fields" "$disclosures_reimbursement_csv_filename")
-declare -a t_27=("disclosures_gift" "$disclosures_gift_fields" "$disclosures_gift_csv_filename")
-declare -a t_28=("disclosures_debt" "$disclosures_debt_fields" "$disclosures_debt_csv_filename")
+declare -a t_7=("search_originatingcourtinformation" "$originatingcourtinformation_fields" "$originatingcourtinformation_csv_filename")
+
+declare -a t_8=("people_db_attorneyorganization" "$people_db_attorneyorganization_fields" "$people_db_attorneyorganization_csv_filename")
+declare -a t_9=("people_db_attorney" "$people_db_attorney_fields" "$people_db_attorney_csv_filename")
+declare -a t_10=("people_db_party" "$people_db_party_fields" "$people_db_party_csv_filename")
+declare -a t_11=("search_docket" "$docket_fields" "$dockets_csv_filename")
+declare -a t_12=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
+declare -a t_13=("people_db_partytype" "$people_db_partytype_fields" "$people_db_partytype_csv_filename")
+declare -a t_14=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename")
+declare -a t_15=("people_db_criminalcount" "$people_db_criminalcount_fields" "$people_db_criminalcount_csv_filename")
+declare -a t_16=("people_db_criminalcomplaint" "$people_db_criminalcomplaint_fields" "$people_db_criminalcomplaint_csv_filename")
+declare -a t_17=("people_db_role" "$people_db_role_fields" "$people_db_role_csv_filename")
+declare -a t_18=("people_db_attorneyorganizationassociation" "$people_db_attorneyorganizationassociation_fields" "$people_db_attorneyorganizationassociation_csv_filename")
+declare -a t_19=("search_docketentry" "$search_docketentry_fields" "$search_docketentry_csv_filename")
+declare -a t_20=("search_opinioncluster_panel" "$search_opinioncluster_panel_fields" "$search_opinioncluster_panel_csv_filename")
+declare -a t_21=("search_opinioncluster_non_participating_judges" "$search_opinioncluster_non_participating_judges_fields" "$search_opinioncluster_non_participating_judges_csv_filename")
+
+declare -a t_22=("search_opinion" "$opinion_fields" "$opinions_csv_filename")
+declare -a t_23=("search_opinion_joined_by" "$search_opinion_joined_by_fields" "$search_opinion_joined_by_csv_filename")
+declare -a t_24=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename")
+declare -a t_25=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename")
+declare -a t_26=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename")
+declare -a t_27=("search_citation" "$citation_fields" "$citations_csv_filename")
+declare -a t_28=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename")
+declare -a t_29=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename")
+declare -a t_30=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename")
+declare -a t_31=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename")
+declare -a t_32=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename")
+declare -a t_33=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename")
+declare -a t_34=("disclosures_financialdisclosure" "$financialdisclosure_fields" "$financialdisclosure_csv_filename")
+declare -a t_35=("disclosures_investment" "$investment_fields" "$investment_csv_filename")
+declare -a t_36=("disclosures_position" "$disclosures_position_fields" "$disclosures_position_csv_filename")
+declare -a t_37=("disclosures_agreement" "$disclosures_agreement_fields" "$disclosures_agreement_csv_filename")
+declare -a t_38=("disclosures_noninvestmentincome" "$noninvestmentincome_fields" "$noninvestmentincome_csv_filename")
+declare -a t_39=("disclosures_spouseincome" "$spouseincome_fields" "$spouseincome_csv_filename")
+declare -a t_40=("disclosures_reimbursement" "$disclosures_reimbursement_fields" "$disclosures_reimbursement_csv_filename")
+declare -a t_41=("disclosures_gift" "$disclosures_gift_fields" "$disclosures_gift_csv_filename")
+declare -a t_42=("disclosures_debt" "$disclosures_debt_fields" "$disclosures_debt_csv_filename")
 
 # Create a new array with the data of each associative array
 declare -a listOfLists
@@ -305,7 +422,7 @@ echo "Streaming ${lst[0]} to S3"
 psql \
 	--command \
 	  "set statement_timeout to 0;
-	   COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, FORCE_QUOTE *)" \
+	   COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE '`', FORCE_QUOTE *)" \
 	--quiet \
 	--host "$DB_HOST" \
 	--username "$DB_USER" \
@@ -324,6 +441,8 @@ pg_dump \
     --table 'search_*' \
     --table 'people_db_*' \
     --table 'audio_*' \
+	--table 'recap_*' \
+	--table 'disclosures_*' \
     --no-privileges \
     --no-publications \
     --no-subscriptions courtlistener | \
@@ -384,7 +503,7 @@ declare -a lst="$group"
 cat >> "$OUT" <<- EOF
 echo "Loading ${lst[2]} to database"
 psql --command \
-"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, HEADER)" \
+"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, QUOTE '`', HEADER)" \
 --host "\$BULK_DB_HOST" \
 --username "\$BULK_DB_USER" \
 --dbname "\$BULK_DB_NAME"

From 9d502c79a737f3d4b3deb7d62592d850590c033f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 19 Jul 2024 14:12:26 +0000
Subject: [PATCH 063/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/make_bulk_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 56e0844a00..24818761e4 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -306,7 +306,7 @@ people_db_partytype_csv_filename="people_db_partytype-$(date -I).csv"
 fjcintegrateddatabase_fields='(
 	       id, dataset_source, date_created, date_modified, office, docket_number, origin, date_filed,
 		   jurisdiction, nature_of_suit, title, section, subsection, diversity_of_residence, class_action,
-		   monetary_demand, county_of_residence, arbitrarion_at_filing, arbitration_at_termination, 
+		   monetary_demand, county_of_residence, arbitrarion_at_filing, arbitration_at_termination,
 		   multidistrict_litigation_docket_number, plaintiff, defendant, date_transfer, transfer_office,
 		   transfer_docket_number, transfer_oprigin, date_terminated, termination_class_action_status,
 		   procedural_progress, disposition, nature_of_judgement, amount_recieved, judgment, pro_se,

From d7132ec90bf778ae9f28855b31724b4a21bee33e Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 19 Jul 2024 10:57:20 -0600
Subject: [PATCH 064/372] fix(opinion_order): update poetry.lock

---
 poetry.lock | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index f7f6c67e40..4d48c0c2ed 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1116,6 +1116,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -5461,4 +5472,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "e6d34875888f1687912d03d33ea68038bba6c6d487037c6454d5b18449ec6d0c"
+content-hash = "5334f16d006f7486a5f9b905906f2a9a68e7f524684c04af3d0994ebd0999384"

From 2013633d8c5b87d6fc7dfc4dfc4c701ad0fb18c2 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 19 Jul 2024 12:22:47 -0600
Subject: [PATCH 065/372] refactor(update_opinions_order): refactor code

---
 .../commands/update_opinions_order.py         | 425 +-----------------
 1 file changed, 20 insertions(+), 405 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 5b86c98130..85ed93e0e2 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,15 +1,20 @@
 import os.path
 import re
-from typing import Any, Optional
+from typing import Optional
 
-from bs4 import BeautifulSoup, NavigableString, Tag
+from bs4 import BeautifulSoup
 from django.core.management import BaseCommand
 from django.db import transaction
 from django.db.models import Count
 
-from cl.corpus_importer.utils import compare_documents, similarity_scores
+from cl.corpus_importer.import_columbia.columbia_utils import (
+    extract_columbia_opinions,
+    map_opinion_types,
+    process_extracted_opinions,
+    read_xml_to_soup,
+)
+from cl.corpus_importer.utils import EmptyOpinionException, match_opinion_lists
 from cl.lib.command_utils import logger
-from cl.lib.string_diff import get_cosine_similarity
 from cl.search.models import SOURCES, Opinion, OpinionCluster
 
 VALID_COLUMBIA_SOURCES = [
@@ -23,393 +28,6 @@
 ]
 
 
-# TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged
-
-SIMPLE_TAGS = [
-    "attorneys",
-    "caption",
-    "citation",
-    "court",
-    "date",
-    "docket",
-    "hearing_date",
-    "panel",
-    "posture",
-    "reporter_caption",
-]
-
-
-class EmptyOpinionException(Exception):
-    """An exception for opinions that raise a ZeroDivisionError Exception due empty
-    opinion tag or empty opinion content in cl"""
-
-    def __init__(self, message: str) -> None:
-        self.message = message
-
-
-def read_xml_to_soup(filepath: str) -> BeautifulSoup:
-    """This function reads the xml file, fixes the bad tags in columbia xml
-    files and returns a BeautifulSoup object
-
-    :param filepath: path to xml file
-    :return: BeautifulSoup object of parsed content
-    """
-    with open(filepath, "r", encoding="utf-8") as f:
-        file_content = f.read()
-        # Sometimes opening and ending tag mismatch (e.g. ed7c6b39dcb29c9c.xml)
-        file_content = file_content.replace(
-            "</footnote_body></block_quote>", "</block_quote></footnote_body>"
-        )
-        # Fix opinion with invalid attribute
-        if "<opinion unpublished=true>" in file_content:
-            file_content = file_content.replace(
-                "<opinion unpublished=true>", "<opinion unpublished='true'>"
-            )
-            file_content = file_content.replace("<unpublished>", "").replace(
-                "</unpublished>", ""
-            )
-    return BeautifulSoup(file_content, "lxml")
-
-
-def add_floating_opinion(
-    opinions: list, floating_content: list, opinion_order: int
-) -> list:
-    """We have found floating opinions in bs object, we keep the opinion
-    content as a new opinion
-
-    :param opinions: a list with opinions found
-    :param floating_content: content that is not in known non-opinion tags
-    :param opinion_order: opinion position
-    :return: updated list of opinions
-    """
-    op_type = "opinion"
-    if opinions:
-        if opinions[-1].get("type"):
-            # Use type of previous opinion if exists
-            op_type = opinions[-1].get("type")
-
-    # Get rid of double spaces from floating content
-    opinion_content = re.sub(
-        " +", " ", "\n".join(floating_content)
-    ).strip()  # type: str
-    if opinion_content:
-        opinions.append(
-            {
-                "opinion": opinion_content,
-                "order": opinion_order,
-                "byline": "",
-                "type": op_type,
-            }
-        )
-    return opinions
-
-
-def extract_columbia_opinions(
-    outer_opinion: BeautifulSoup,
-) -> list[Optional[dict]]:
-    """We extract all possible opinions from BeautifulSoup, with and without
-    author, and we create new opinions if floating content exists(content that
-    is not explicitly defined within an opinion tag or doesn't have an author)
-
-    :param outer_opinion: element containing all xml tags
-    :return: list of opinion dicts
-    """
-    opinions: list = []
-    floating_content = []
-    order = 0
-
-    # We iterate all content to look for all possible opinions
-    for i, content in enumerate(outer_opinion):  # type: int, Tag
-        if isinstance(content, NavigableString):
-            # We found a raw string, store it
-            floating_content.append(str(content))
-        else:
-            if content.name in SIMPLE_TAGS + [
-                "citation_line",
-                "opinion_byline",
-                "dissent_byline",
-                "concurrence_byline",
-            ]:
-                # Ignore these tags, it will be processed later
-                continue
-            elif content.name in [
-                "opinion_text",
-                "dissent_text",
-                "concurrence_text",
-            ]:
-                if floating_content:
-                    # We have found an opinion, but there is floating
-                    # content, we create a dict with the opinion using the
-                    # floating content with default type = "opinion"
-                    opinions = add_floating_opinion(
-                        opinions, floating_content, order
-                    )
-                    floating_content = []
-
-                byline = content.find_previous_sibling()
-                opinion_author = ""
-                if byline and "_byline" in byline.name:
-                    opinion_author = byline.get_text()
-
-                opinion_content = re.sub(
-                    " +", " ", content.decode_contents()
-                ).strip()
-                if opinion_content:
-                    # Now we create a dict with current opinion
-                    opinions.append(
-                        {
-                            "opinion": opinion_content,
-                            "order": order,
-                            "byline": opinion_author,
-                            "type": content.name.replace("_text", ""),
-                        }
-                    )
-                    order = order + 1
-
-            else:
-                if content.name not in SIMPLE_TAGS + ["syllabus"]:
-                    # We store content that is not inside _text tag and is
-                    # not in one of the known non-opinion tags
-                    floating_content.append(str(content))
-
-    # Combine the new content into another opinion. great.
-    if floating_content:
-        # If we end to go through all the found opinions and if we still
-        # have floating content out there, we create a new opinion with the
-        # last type of opinion
-        opinions = add_floating_opinion(opinions, floating_content, order)
-    return opinions
-
-
-def is_per_curiam_opinion(
-    content: Optional[str], byline: Optional[str]
-) -> bool:
-    """Check if opinion author is per curiam
-    :param content: opinion content
-    :param byline: opinion text author
-    :return: True if opinion author is per curiam
-    """
-    if byline and "per curiam" in byline[:1000].lower():
-        return True
-    if content and "per curiam" in content[:1000].lower():
-        return True
-    return False
-
-
-def merge_opinions(
-    opinions: list, content: list, current_order: int
-) -> tuple[list, int]:
-    """Merge last and previous opinion if are the same type or create a new
-    opinion if merge is not possible
-
-    :param opinions: list of opinions that is being updated constantly
-    :param content: list of opinions without an author
-    :param current_order: opinion position
-    :return: updated list of opinions
-    """
-
-    # We check if the previous stored opinion matches the type of the
-    # content, and we store the opinion dict temporary
-    relevant_opinions = (
-        [opinions[-1]]
-        if opinions and opinions[-1]["type"] == content[0].get("type")
-        else []
-    )
-
-    if relevant_opinions:
-        relevant_opinions[-1]["opinion"] += "\n" + "\n".join(
-            [f.get("opinion") for f in content if f.get("opinion")]
-        )
-
-    else:
-        # No relevant opinions found, create a new opinion with the content
-        opinion_content = "\n".join(
-            [f.get("opinion") for f in content if f.get("opinion")]
-        )
-        new_opinion = {
-            "byline": None,
-            "type": content[0].get("type"),
-            "opinion": opinion_content,
-            "order": current_order,
-            "per_curiam": is_per_curiam_opinion(opinion_content, None),
-        }
-        opinions.append(new_opinion)
-        current_order = current_order + 1
-
-    return opinions, current_order
-
-
-def process_extracted_opinions(extracted_opinions: list) -> list:
-    """We read the extracted data in extract_opinions function to merge all
-    possible floating opinions (it is not explicitly defined within an opinion
-    tag or doesn't have an author)
-
-    :param extracted_opinions: list of opinions obtained from xml file
-    :return: a list with extracted and processed opinions
-    """
-
-    opinions: list = []
-    authorless_content = []
-    order = 0
-
-    for i, found_content in enumerate(extracted_opinions, start=1):
-        byline = found_content.get("byline")
-        if not byline:
-            # Opinion has no byline, store opinion content
-            authorless_content.append(found_content)
-
-        if byline:
-            # Opinion has byline, get opinion type and content
-            opinion_type = found_content.get("type")
-            opinion_content = found_content.get("opinion", "")
-            # Store content that doesn't match the current opinion type
-            alternative_authorless_content = [
-                content
-                for content in authorless_content
-                if content.get("type") != opinion_type
-            ]
-            # Keep content that matches the current type
-            authorless_content = [
-                op_content
-                for op_content in authorless_content
-                if op_content.get("type") == opinion_type
-            ]
-
-            if alternative_authorless_content:
-                # Keep floating text that are not from the same type,
-                # we need to create a separate opinion for those,
-                # for example: in 2713f39c5a8e8684.xml we have an opinion
-                # without an author, and the next opinion with an author is
-                # a dissent opinion, we can't combine both
-                opinions, order = merge_opinions(
-                    opinions, alternative_authorless_content, order
-                )
-
-            opinion_content = (
-                "\n".join(
-                    [
-                        f.get("opinion")
-                        for f in authorless_content
-                        if f.get("type") == opinion_type
-                    ]
-                )
-                + "\n\n"
-                + opinion_content
-            )
-
-            # Add new opinion
-            new_opinion = {
-                "byline": byline,
-                "type": opinion_type,
-                "opinion": opinion_content,
-                "order": order,
-                "per_curiam": is_per_curiam_opinion(opinion_content, byline),
-            }
-
-            opinions.append(new_opinion)
-            order = order + 1
-            authorless_content = []
-
-        if len(extracted_opinions) == i and authorless_content:
-            # If is the last opinion, and we still have opinions without
-            # byline, create an opinion without an author and the contents
-            # that couldn't be merged
-            opinions, order = merge_opinions(
-                opinions, authorless_content, order
-            )
-
-    return opinions
-
-
-def map_opinion_types(opinions=None) -> None:
-    """Map opinion type to model field choice
-
-    :param opinions: a list that contains all opinions as dict elements
-    :return: None
-    """
-
-    if opinions is None:
-        opinions = []
-    lead = False
-    for op in opinions:
-        op_type = op.get("type")
-        # Only first opinion with "opinion" type is a lead opinion, the next
-        # opinion with "opinion" type is an addendum
-        if not lead and op_type and op_type == "opinion":
-            lead = True
-            op["type"] = "020lead"
-            continue
-        elif lead and op_type and op_type == "opinion":
-            op["type"] = "050addendum"
-        elif op_type and op_type == "dissent":
-            op["type"] = "040dissent"
-        elif op_type and op_type == "concurrence":
-            op["type"] = "030concurrence"
-
-
-def match_opinion_lists(
-    file_opinions_list: list[Any], cl_opinions_list: list[Any]
-) -> dict[int, int]:
-    """Try to match the opinions on two lists and generate a dict with position of
-    matching opinions
-
-    Remove non-alphanumeric and non-whitespace characters from lowercased text,
-    this tries to make both texts in equal conditions to prove if both are similar or
-    equal
-
-    get_cosine_similarity works great when both texts are almost the same with very
-    small variations
-
-    Sometimes cosine similarity fails when there are small variations in text,
-    such as parties, attorneys, case name, or court that are included in the content
-    of the opinion, compare_documents() checks the percentage of the file opinion
-    text that it is in courtlistener opinion, having a large percentage means that
-    almost all the file opinion is in courtlistener opinion, but there is a
-    possibility that the courtlistener opinion contains some additional data in que
-    opinion content (such as case name, parties, etc.)
-
-    compare_documents works good when the opinion from the file is a subset of the
-    opinion in CL, the percentage represents how much of the opinion of the file is
-    in the opinion from cl (content in cl opinion can have other data in the body
-    like posture, attorneys, etc. e.g. in cluster id: 7643871 we have the posture and
-    the opinion text but in the xml file we only have the opinion text, cosine_sim:
-    0.1639075094124459 and percent_match: 73)
-
-    Sometimes one algorithm performs better than the other, this is due to some
-    additional text, such as editor's notes, or the author, page number or posture
-    added to the opinion
-
-    Key is opinion position from file, Value is opinion position from cl opinion e.g.
-    matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file opinion and
-    2 is cl opinion
-
-    :param file_opinions_list: Opinions from file
-    :param cl_opinions_list: CL opinions
-    :return: Matches if found or empty dict
-    """
-
-    scores = similarity_scores(file_opinions_list, cl_opinions_list)
-
-    matches = {}
-    for i, row in enumerate(scores):
-        j = row.argmax()  # type: ignore
-        file_opinion = re.sub(
-            r"[^a-zA-Z0-9 ]", "", file_opinions_list[i].lower()
-        )
-        cl_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", cl_opinions_list[j].lower())
-
-        cosine_sim = get_cosine_similarity(file_opinion, cl_opinion)
-
-        percent_match = compare_documents(file_opinion, cl_opinion)
-
-        if cosine_sim < 0.60 and percent_match < 60:
-            continue
-
-        matches[i] = j
-
-    return matches
-
-
 def clean_opinion_content(text: str) -> str:
     """Clean opinion content
 
@@ -424,9 +42,6 @@ def clean_opinion_content(text: str) -> str:
     return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
 
 
-# TODO ------------------------ remove until here -------------------------------
-
-
 def get_opinions_cleaned_content(
     cluster_id,
 ) -> tuple[Optional[str], list[dict], int, bool]:
@@ -531,6 +146,8 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None:
     """We assume that harvard data is already ordered, we just need to fill the order
     field in each opinion
 
+    The harvard importer created the opinions in order of appearance in the file
+
     :param start_id: skip any id lower than this value
     :param end_id: skip any id greater than this value
     :return: None
@@ -795,25 +412,23 @@ def add_arguments(self, parser):
         )
 
     def handle(self, *args, **options):
-        if options["process_harvard"] and options["process_columbia"]:
-            print(
-                "You can only select one option process-harvard or process-columbia"
+
+        if not options["process_harvard"] and not options["process_columbia"]:
+            logger.info(
+                "One option required: process-harvard or process-columbia"
             )
             return
 
-        if not options["process_harvard"] and not options["process_columbia"]:
-            print("One option required: process-harvard or process-columbia")
+        if options["process_harvard"] and options["process_columbia"]:
+            logger.info(
+                "You can only select one option process-harvard or process-columbia"
+            )
             return
 
         if options["process_harvard"]:
             sort_harvard_opinions(options["start_id"], options["end_id"])
 
-        if options["process_columbia"] and options["xml_dir"]:
+        if options["process_columbia"]:
             sort_columbia_opinions(
                 options["start_id"], options["end_id"], options["xml_dir"]
             )
-
-        if options["process_columbia"] and not options["xml_dir"]:
-            print(
-                "Argument --xml-dir required to read xml files from mounted directory"
-            )

From 48184509bf03ca11ee2d3a75ff7bbb6ffc5f809f Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 19 Jul 2024 16:12:23 -0400
Subject: [PATCH 066/372] feat(UI): Update state court picker

Add territories as its own section in the
state court picker

Also - remove code that bundles courts
and move it to a template filter to
simplify things.
---
 cl/custom_filters/templatetags/extras.py      | 38 ++++++++++++++++++-
 cl/lib/search_utils.py                        | 25 +++---------
 .../includes/jurisdiction_picker_modal.html   | 36 +++++++++++-------
 3 files changed, 65 insertions(+), 34 deletions(-)

diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 40d2813cda..2624e81cb6 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -10,7 +10,7 @@
 from django.utils.safestring import SafeString, mark_safe
 from elasticsearch_dsl import AttrDict, AttrList
 
-from cl.search.models import Docket, DocketEntry
+from cl.search.models import Court, Docket, DocketEntry
 
 register = template.Library()
 
@@ -243,3 +243,39 @@ def get_highlight(result: AttrDict | dict[str, any], field: str) -> any:
         original_value = result.get(field, "")
 
     return render_string_or_list(hl_value) if hl_value else original_value
+
+
+@register.filter
+def group_courts(courts: list[Court], num_columns: int) -> list:
+    """Divide courts in equal groupings while keeping related courts together
+
+    :param courts: Courts to group.
+    :param num_columns: Number of groups wanted
+    :return: The courts grouped together
+    """
+
+    column_len = len(courts) // num_columns
+    remainder = len(courts) % num_columns
+
+    groups = []
+    start = 0
+    for index in range(num_columns):
+        # Calculate the end index for this chunk
+        end = start + column_len + (1 if index < remainder else 0)
+
+        # Find the next 'TS' or 'S' starting point
+        while end < len(courts) and courts[end].jurisdiction not in [
+            "TS",
+            "S",
+        ]:
+            end += 1
+
+        # Adjust the chunk to start with 'TS' or 'S'
+        while start < end and courts[start].jurisdiction not in ["TS", "S"]:
+            start += 1
+
+        # Create the column and add it to result
+        groups.append(courts[start:end])
+        start = end
+
+    return groups
diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py
index 5a3fdb6afb..affb89318e 100644
--- a/cl/lib/search_utils.py
+++ b/cl/lib/search_utils.py
@@ -233,8 +233,8 @@ def merge_form_with_courts(
     }
     bap_bundle = []
     b_bundle = []
-    state_bundle: List = []
-    state_bundles = []
+    states = []
+    territories = []
     for court in courts:
         if court.jurisdiction == Court.FEDERAL_APPELLATE:
             court_tabs["federal"].append(court)
@@ -247,15 +247,9 @@ def merge_form_with_courts(
             else:
                 b_bundle.append(court)
         elif court.jurisdiction in Court.STATE_JURISDICTIONS:
-            # State courts get bundled by supreme courts
-            if court.jurisdiction == Court.STATE_SUPREME:
-                # Whenever we hit a state supreme court, we append the
-                # previous bundle and start a new one.
-                if state_bundle:
-                    state_bundles.append(state_bundle)
-                state_bundle = [court]
-            else:
-                state_bundle.append(court)
+            states.append(court)
+        elif court.jurisdiction in Court.TERRITORY_JURISDICTIONS:
+            territories.append(court)
         elif court.jurisdiction in [
             Court.FEDERAL_SPECIAL,
             Court.COMMITTEE,
@@ -265,18 +259,11 @@ def merge_form_with_courts(
         ]:
             court_tabs["special"].append(court)
 
-    # append the final state bundle after the loop ends. Hack?
-    state_bundles.append(state_bundle)
-
     # Put the bankruptcy bundles in the courts dict
     if bap_bundle:
         court_tabs["bankruptcy_panel"] = [bap_bundle]
     court_tabs["bankruptcy"] = [b_bundle]
-
-    # Divide the state bundles into the correct partitions
-    court_tabs["state"].append(state_bundles[:17])
-    court_tabs["state"].append(state_bundles[17:34])
-    court_tabs["state"].append(state_bundles[34:])
+    court_tabs["state"] = [states, territories]
 
     return court_tabs, court_count_human, court_count
 
diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index 842337c0da..1cf3800812 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -1,4 +1,5 @@
 {% load partition_util %}
+{% load extras %}
 
 <div class="modal" id="court-picker" role="dialog" aria-hidden="true">
     <div class="modal-dialog" id="modal-court-picker">
@@ -166,24 +167,31 @@ <h3 class="bottom inline">
                     {% endif %}
 
                     {% if v == SEARCH_TYPES.OPINION or v == SEARCH_TYPES.PEOPLE %}
-                      <div class="tab-pane" id="tab-state">
-                        <div class="row">
-                          {% for col_bundle in courts.state %}
-                            <div class="col-sm-4">
-                                {% for court_bundle in col_bundle %}
-                                  {% for court in court_bundle %}
-                                    {% if court.jurisdiction == 'S' %}
-                                      {% include "includes/court_checkbox.html" %}
-                                    {% else %}
-                                      {% include "includes/court_checkbox.html" with indent=True %}
-                                    {% endif %}
-                                  {% endfor %}
+                        <div class="tab-pane" id="tab-state">
+                            {% for group in courts.state %}
+                                {% if forloop.counter == 1 %}
+                                    <h3 class="bottom inline">State Courts</h3>
+                                {% elif forloop.counter == 2 %}
+                                    <hr>
+                                    <h3 class="bottom inline">U.S. Territory Courts</h3>
+                                {% endif %}
+                            <div class="row">
+                                {% for col_bundle in group|group_courts:3 %}
+                                    <div class="col-sm-4">
+                                    {% for court in col_bundle %}
+                                        {% if court.jurisdiction == 'S' %}
+                                            {% include "includes/court_checkbox.html" %}
+                                        {% else %}
+                                            {% include "includes/court_checkbox.html" with indent=True %}
+                                        {% endif %}
+                                    {% endfor %}
+                                    </div>
                                 {% endfor %}
                             </div>
-                          {% endfor %}
+                            {% endfor %}
                         </div>
-                      </div>
                     {% endif %}
+
                     {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
                         <div class="tab-pane" id="tab-special">
                             {# Regroup into closed/open courts #}

From 80097c03e0b8246af4397d9d4087d63f66431a02 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 22 Jul 2024 10:10:27 -0600
Subject: [PATCH 067/372] feat(scrape_pacer_free_opinions): save html file when
 running sweep

remove unnecessary actions
---
 .../commands/scrape_pacer_free_opinions.py    | 164 ++++--------------
 1 file changed, 34 insertions(+), 130 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index 4586f0c0dd..9bcda178f1 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -1,5 +1,4 @@
 import argparse
-import calendar
 import datetime
 import os
 from typing import Callable, Dict, List, Optional, Tuple, cast
@@ -189,14 +188,34 @@ def get_and_save_free_document_reports(options: OptionsType) -> None:
     pacer_court_ids = [map_cl_to_pacer_id(v) for v in cl_court_ids]
 
     if options["date_start"] and options["date_end"]:
-        for pacer_court_id in pacer_court_ids:
-            # Here we do not save the log since if an incorrect range is entered
-            # the next time the daily cron is executed the command could skip days
-            exc, status = fetch_doc_report(
-                pacer_court_id, options["date_start"], options["date_end"]  # type: ignore
-            )
-            if exc:
-                break
+        # The first date queried is 1950-05-12 from ca9, that should be the starting
+        # point
+        dates = make_date_range_tuples(
+            options["date_start"], options["date_end"], gap=7
+        )
+        for _start, _end in dates:
+            # Running sweep in intervals of 7 days for each court to try to avoid any
+            # blocking
+            for pacer_court_id in pacer_court_ids:
+                log = mark_court_in_progress(pacer_court_id, _end)
+
+                exc, status = fetch_doc_report(
+                    pacer_court_id, _start, _end, log.pk  # type: ignore
+                )
+                if exc:
+                    mark_court_done_on_date(
+                        log.pk,
+                        PACERFreeDocumentLog.SCRAPE_FAILED,
+                    )
+                    # Continue running the sweep but log the date range and court
+                    # where it failed to rerun it later for that specific data.
+                    logger.error(
+                        f"Sweep failed for {pacer_court_id} in the range from {options['date_start']} to {options['date_end']}",
+                        exc_info=True,
+                    )
+                    continue
+
+                mark_court_done_on_date(log.pk, status)
     else:
         today = now()
         for pacer_court_id in pacer_court_ids:
@@ -328,123 +347,6 @@ def ocr_available(options: OptionsType) -> None:
             logger.info(f"Sent {i + 1}/{count} tasks to celery so far.")
 
 
-def do_quarterly(options: OptionsType):
-    """Collect last quarter documents
-
-    Run it every three months (0 0 1 */3 *)
-
-    :return: None
-    """
-    first_day_current_month = datetime.datetime.now().replace(day=1)
-
-    # Calculate the first day of the month three months ago
-    if first_day_current_month.month <= 3:
-        start_year = first_day_current_month.year - 1
-        start_month = first_day_current_month.month + 9
-    else:
-        start_year = first_day_current_month.year
-        start_month = first_day_current_month.month - 3
-    start_date = datetime.date(start_year, start_month, 1)
-
-    # Calculate the last day of the month prior to today
-    last_month = first_day_current_month - datetime.timedelta(days=1)
-    end_day = calendar.monthrange(last_month.year, last_month.month)[1]
-    end_date = datetime.date(last_month.year, last_month.month, end_day)
-
-    dates = make_date_range_tuples(start_date, end_date, gap=7)
-
-    for _start, _end in dates:
-        # We run this in 7-day date ranges to ingest all the information on a weekly
-        # basis and not wait for all the responses from three months ago to now from
-        # each court. This also allows us to scrape each court every 7 day range to
-        # avoid possible blockages.
-        options["date_start"] = _start  # type: ignore
-        options["date_end"] = _end  # type: ignore
-        do_everything(options)
-
-
-def do_monthly(options: OptionsType):
-    """Collect last month's documents
-
-    Run it on the 3rd of each month to let them update the last days of the month
-    (15 2 3 * *)
-
-    :return: None
-    """
-    today = datetime.date.today()
-    prev_month, current_year = (
-        (today.month - 1, today.year)
-        if today.month != 1
-        else (12, today.year - 1)
-    )
-    month_last_day = calendar.monthrange(current_year, prev_month)[1]
-    start = datetime.date(current_year, prev_month, 1)
-    end = datetime.date(current_year, prev_month, month_last_day)
-
-    # Update options with start and end date of previous month
-    options["date_start"] = start  # type: ignore
-    options["date_end"] = end  # type: ignore
-
-    do_everything(options)
-
-
-def do_weekly(options: OptionsType):
-    """Collect last week's documents
-
-    Run it every wednesday (* * * * 3)
-
-    :return: None
-    """
-
-    today = datetime.date.today()
-    weekday = today.weekday()
-    start_of_this_week = today - datetime.timedelta(days=weekday)
-    start_of_previous_week = start_of_this_week - datetime.timedelta(weeks=1)
-    end_of_previous_week = start_of_previous_week + datetime.timedelta(days=6)
-
-    # Update options with start and end date of previous week
-    options["date_start"] = start_of_previous_week  # type: ignore
-    options["date_end"] = end_of_previous_week  # type: ignore
-
-    do_everything(options)
-
-
-def do_all(options: OptionsType):
-    """Collect all documents since the beginning of time
-
-    It was established on this date based on the PacerFreeDocumentLog table. The first
-    date queried is 1950-05-12 from ca9.
-
-    The command will be executed until the day on which it is executed.
-
-    To collect all documents, weekly, monthly and quarterly sweeps will be used to make
-    sure we don't miss anything.
-
-    Take note that documents could be missing if they were marked as free after these
-    periods.
-
-    :return: None
-    """
-
-    start = (
-        options["date_start"]
-        if options["date_start"]
-        else datetime.date(1950, 5, 1)
-    )
-    end = datetime.date.today()
-
-    dates = make_date_range_tuples(start, end, gap=7)
-
-    for _start, _end in dates:
-        # We run this in 7-day date ranges to ingest all the information on a weekly
-        # basis and not wait for all the responses from 1950 to now from each court (
-        # ~3900 weeks/requests until today). This also allows us to scrape each court
-        # every 7 day range to avoid possible blockages.
-        options["date_start"] = _start  # type: ignore
-        options["date_end"] = _end  # type: ignore
-        do_everything(options)
-
-
 def do_everything(options: OptionsType):
     logger.info("Running and compiling free document reports.")
     get_and_save_free_document_reports(options)
@@ -511,6 +413,12 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None:
     def handle(self, *args: List[str], **options: OptionsType) -> None:
         super().handle(*args, **options)
 
+        if options["date_start"] and not options["date_end"]:
+            logger.info(
+                "Error: date-end must be specified to use date-start option."
+            )
+            return
+
         if options["date_start"] and options["date_end"]:
             if options["date_start"] > options["date_end"]:  # type: ignore
                 logger.info(
@@ -525,8 +433,4 @@ def handle(self, *args: List[str], **options: OptionsType) -> None:
         "get-report-results": get_and_save_free_document_reports,
         "get-pdfs": get_pdfs,
         "ocr-available": ocr_available,
-        "do-quarterly": do_quarterly,
-        "do-monthly": do_monthly,
-        "do-weekly": do_weekly,
-        "do-all": do_all,
     }

From 60a9d80ff752bb9b189401a676bf59ea1a4475c5 Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Mon, 22 Jul 2024 13:10:32 -0300
Subject: [PATCH 068/372] removing disclosure and party tables

---
 scripts/make_bulk_data.sh | 152 ++++++--------------------------------
 1 file changed, 22 insertions(+), 130 deletions(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 56e0844a00..e428b8eb42 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -194,74 +194,6 @@ people_db_person_race_fields='(
 	   )'
 people_db_person_race_csv_filename="people-db-races-$(date -I).csv"
 
-# disclosures_financialdisclosure
-financialdisclosure_fields='(
-	       id, date_created, date_modified, year, download_filepath, filepath, thumbnail,
-	       thumbnail_status, page_count, sha1, report_type, is_amended, addendum_content_raw,
-	       addendum_redacted, has_been_extracted, person_id
-	   )'
-financialdisclosure_csv_filename="financial-disclosures-$(date -I).csv"
-
-# disclosures_investment
-investment_fields='(
-	       id, date_created, date_modified, page_number, description, redacted,
-	       income_during_reporting_period_code, income_during_reporting_period_type,
-	       gross_value_code, gross_value_method,
-	       transaction_during_reporting_period, transaction_date_raw,
-	       transaction_date, transaction_value_code, transaction_gain_code,
-	       transaction_partner, has_inferred_values, financial_disclosure_id
-	   )'
-investment_csv_filename="financial-disclosure-investments-$(date -I).csv"
-
-# disclosures_position
-disclosures_position_fields='(
-	       id, date_created, date_modified, position, organization_name,
-	       redacted, financial_disclosure_id
-	   )'
-disclosures_position_csv_filename="financial-disclosures-positions-$(date -I).csv"
-
-# disclosures_agreement
-disclosures_agreement_fields='(
-	       id, date_created, date_modified, date_raw, parties_and_terms,
-	       redacted, financial_disclosure_id
-	   )'
-disclosures_agreement_csv_filename="financial-disclosures-agreements-$(date -I).csv"
-
-# disclosures_noninvestmentincome
-noninvestmentincome_fields='(
-	       id, date_created, date_modified, date_raw, source_type,
-	       income_amount, redacted, financial_disclosure_id
-	   )'
-noninvestmentincome_csv_filename="financial-disclosures-non-investment-income-$(date -I).csv"
-
-# disclosures_spouseincome
-spouseincome_fields='(
-	       id, date_created, date_modified, source_type, date_raw, redacted,
-	       financial_disclosure_id
-	   )'
-spouseincome_csv_filename="financial-disclosures-spousal-income-$(date -I).csv"
-
-# disclosures_reimbursement
-disclosures_reimbursement_fields='(
-	       id, date_created, date_modified, source, date_raw, location,
-	       purpose, items_paid_or_provided, redacted, financial_disclosure_id
-	   )'
-disclosures_reimbursement_csv_filename="financial-disclosures-reimbursements-$(date -I).csv"
-
-# disclosures_gift
-disclosures_gift_fields='(
-	       id, date_created, date_modified, source, description, value,
-	       redacted, financial_disclosure_id
-	   )'
-disclosures_gift_csv_filename="financial-disclosures-gifts-$(date -I).csv"
-
-# disclosures_debt
-disclosures_debt_fields='(
-	       id, date_created, date_modified, creditor_name, description,
-	       value_code, redacted, financial_disclosure_id
-	   )'
-disclosures_debt_csv_filename="financial-disclosures-debts-$(date -I).csv"
-
 
 people_db_attorneyorganization_fields='(
 	       id, date_created, date_modified, lookup_key, name, address1, address2, city, state, zip_code
@@ -275,12 +207,6 @@ people_db_attorney_fields='(
 people_db_attorney_csv_filename="people_db_attorney-$(date -I).csv"
 
 
-people_db_party_fields='(
-	       id, date_created, date_modified, name, extra_info
-	   )'
-people_db_party_csv_filename="people_db_party-$(date -I).csv"
-
-
 docket_fields='(
 	       id, date_created, date_modified, date_cert_granted, date_cert_denied, date_argued,
 		   date_reargued, date_reargument_denied, case_name_short, case_name, case_name_full, slug,
@@ -295,14 +221,6 @@ docket_fields='(
 dockets_csv_filename="search_docket-$(date -I).csv"
 
 
-
-people_db_partytype_fields='(
-	       id, name, docket_id, party_id, date_terminated, extra_info,
-		   highest_offense_level_opening, highest_offense_level_terminated
-	   )'
-people_db_partytype_csv_filename="people_db_partytype-$(date -I).csv"
-
-
 fjcintegrateddatabase_fields='(
 	       id, dataset_source, date_created, date_modified, office, docket_number, origin, date_filed,
 		   jurisdiction, nature_of_suit, title, section, subsection, diversity_of_residence, class_action,
@@ -315,19 +233,6 @@ fjcintegrateddatabase_fields='(
 fjcintegrateddatabase_csv_filename="recap_fjcintegrateddatabase-$(date -I).csv"
 
 
-people_db_criminalcount_fields='(
-	       id, date_created, date_modified, creditor_name, description,
-	       value_code, redacted, financial_disclosure_id
-	   )'
-people_db_criminalcount_csv_filename="people_db_criminalcount-$(date -I).csv"
-
-
-people_db_criminalcomplaint_fields='(
-	       id, name, disposition, status, party_type_id
-	   )'
-people_db_criminalcomplaint_csv_filename="people_db_criminalcomplaint-$(date -I).csv"
-
-
 people_db_role_fields='(
 	       id, role, date_action, attorney_id, docket_id, party_id, role_raw
 	   )'
@@ -359,7 +264,7 @@ search_opinioncluster_non_participating_judges_fields='(
 search_opinioncluster_non_participating_judges_csv_filename="search_opinioncluster_non_participating_judges-$(date -I).csv"
 
 # If you add or remove a table, you need to update this number
-NUM_TABLES=42
+NUM_TABLES=29
 
 # Every new table added to bulk script should be added as an associative array
 # This ordering is important. Tables with foreign key constraints must be loaded in order.
@@ -373,40 +278,27 @@ declare -a t_7=("search_originatingcourtinformation" "$originatingcourtinformati
 
 declare -a t_8=("people_db_attorneyorganization" "$people_db_attorneyorganization_fields" "$people_db_attorneyorganization_csv_filename")
 declare -a t_9=("people_db_attorney" "$people_db_attorney_fields" "$people_db_attorney_csv_filename")
-declare -a t_10=("people_db_party" "$people_db_party_fields" "$people_db_party_csv_filename")
-declare -a t_11=("search_docket" "$docket_fields" "$dockets_csv_filename")
-declare -a t_12=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
-declare -a t_13=("people_db_partytype" "$people_db_partytype_fields" "$people_db_partytype_csv_filename")
-declare -a t_14=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename")
-declare -a t_15=("people_db_criminalcount" "$people_db_criminalcount_fields" "$people_db_criminalcount_csv_filename")
-declare -a t_16=("people_db_criminalcomplaint" "$people_db_criminalcomplaint_fields" "$people_db_criminalcomplaint_csv_filename")
-declare -a t_17=("people_db_role" "$people_db_role_fields" "$people_db_role_csv_filename")
-declare -a t_18=("people_db_attorneyorganizationassociation" "$people_db_attorneyorganizationassociation_fields" "$people_db_attorneyorganizationassociation_csv_filename")
-declare -a t_19=("search_docketentry" "$search_docketentry_fields" "$search_docketentry_csv_filename")
-declare -a t_20=("search_opinioncluster_panel" "$search_opinioncluster_panel_fields" "$search_opinioncluster_panel_csv_filename")
-declare -a t_21=("search_opinioncluster_non_participating_judges" "$search_opinioncluster_non_participating_judges_fields" "$search_opinioncluster_non_participating_judges_csv_filename")
-
-declare -a t_22=("search_opinion" "$opinion_fields" "$opinions_csv_filename")
-declare -a t_23=("search_opinion_joined_by" "$search_opinion_joined_by_fields" "$search_opinion_joined_by_csv_filename")
-declare -a t_24=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename")
-declare -a t_25=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename")
-declare -a t_26=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename")
-declare -a t_27=("search_citation" "$citation_fields" "$citations_csv_filename")
-declare -a t_28=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename")
-declare -a t_29=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename")
-declare -a t_30=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename")
-declare -a t_31=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename")
-declare -a t_32=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename")
-declare -a t_33=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename")
-declare -a t_34=("disclosures_financialdisclosure" "$financialdisclosure_fields" "$financialdisclosure_csv_filename")
-declare -a t_35=("disclosures_investment" "$investment_fields" "$investment_csv_filename")
-declare -a t_36=("disclosures_position" "$disclosures_position_fields" "$disclosures_position_csv_filename")
-declare -a t_37=("disclosures_agreement" "$disclosures_agreement_fields" "$disclosures_agreement_csv_filename")
-declare -a t_38=("disclosures_noninvestmentincome" "$noninvestmentincome_fields" "$noninvestmentincome_csv_filename")
-declare -a t_39=("disclosures_spouseincome" "$spouseincome_fields" "$spouseincome_csv_filename")
-declare -a t_40=("disclosures_reimbursement" "$disclosures_reimbursement_fields" "$disclosures_reimbursement_csv_filename")
-declare -a t_41=("disclosures_gift" "$disclosures_gift_fields" "$disclosures_gift_csv_filename")
-declare -a t_42=("disclosures_debt" "$disclosures_debt_fields" "$disclosures_debt_csv_filename")
+declare -a t_10=("search_docket" "$docket_fields" "$dockets_csv_filename")
+declare -a t_11=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
+declare -a t_12=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename")
+declare -a t_13=("people_db_role" "$people_db_role_fields" "$people_db_role_csv_filename")
+declare -a t_14=("people_db_attorneyorganizationassociation" "$people_db_attorneyorganizationassociation_fields" "$people_db_attorneyorganizationassociation_csv_filename")
+declare -a t_15=("search_docketentry" "$search_docketentry_fields" "$search_docketentry_csv_filename")
+declare -a t_16=("search_opinioncluster_panel" "$search_opinioncluster_panel_fields" "$search_opinioncluster_panel_csv_filename")
+declare -a t_17=("search_opinioncluster_non_participating_judges" "$search_opinioncluster_non_participating_judges_fields" "$search_opinioncluster_non_participating_judges_csv_filename")
+
+declare -a t_18=("search_opinion" "$opinion_fields" "$opinions_csv_filename")
+declare -a t_19=("search_opinion_joined_by" "$search_opinion_joined_by_fields" "$search_opinion_joined_by_csv_filename")
+declare -a t_20=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename")
+declare -a t_21=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename")
+declare -a t_22=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename")
+declare -a t_23=("search_citation" "$citation_fields" "$citations_csv_filename")
+declare -a t_24=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename")
+declare -a t_25=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename")
+declare -a t_26=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename")
+declare -a t_27=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename")
+declare -a t_28=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename")
+declare -a t_29=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename")
 
 # Create a new array with the data of each associative array
 declare -a listOfLists

From 4825fb996000fe397fb5a7ef7d3fe88e6cb20909 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 22 Jul 2024 12:00:21 -0500
Subject: [PATCH 069/372] fix(logging): enable juriscraper logger on
 VerboseCommand

When the optional --verbosity argument is passed with a value greater than one (`>1`), the juriscraper logger will be enabled. This

By default, --verbosity has value `1`, so this will not change default behaviour
---
 cl/lib/command_utils.py        |  4 ++++
 cl/settings/project/logging.py | 12 ++++++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/cl/lib/command_utils.py b/cl/lib/command_utils.py
index d246288ac5..3c41dd426d 100644
--- a/cl/lib/command_utils.py
+++ b/cl/lib/command_utils.py
@@ -17,6 +17,10 @@ def handle(self, *args, **options):
             logger.setLevel(logging.INFO)
         elif verbosity > 1:
             logger.setLevel(logging.DEBUG)
+            # This will make juriscraper's logger, called "Logger"
+            # accept most logger calls.
+            juriscraper_logger = logging.getLogger("Logger")
+            juriscraper_logger.setLevel(logging.DEBUG)
 
 
 class CommandUtils:
diff --git a/cl/settings/project/logging.py b/cl/settings/project/logging.py
index 0b1a793246..30e561cb00 100644
--- a/cl/settings/project/logging.py
+++ b/cl/settings/project/logging.py
@@ -78,15 +78,15 @@ def skip_unreadable_post(record):
         },
         # This is the one that's used practically everywhere in the code.
         "cl": {"handlers": ["console"], "level": "INFO", "propagate": True},
-        "juriscraper": {
-            "handlers": ["console"],
-            "propagate": True,
-            "level": "DEBUG",
-        },
+        # Juriscraper's logger is called "Logger"
+        # CRITICAL is the highest log level, which will make the logger
+        # reject most logger calls from juriscraper: debug, info and warning
+        # This level may be modified on a VerboseCommand call with the
+        # proper verbosity value
         "Logger": {
             "handlers": ["console"],
             "propagate": True,
-            "level": "DEBUG",
+            "level": "CRITICAL",
         },
     },
 }

From f03497d2c206a8cd40af2cdd8e2b2eec5361b673 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 22 Jul 2024 12:31:57 -0500
Subject: [PATCH 070/372] fix(logging): rename juriscraper logger reference

Assume it now follows the hierarchical convention, so we can access the parent juriscraper logger by using logging.getLogger("juriscraper")
---
 cl/lib/command_utils.py        | 5 ++---
 cl/settings/project/logging.py | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/cl/lib/command_utils.py b/cl/lib/command_utils.py
index 3c41dd426d..2c3797f9f5 100644
--- a/cl/lib/command_utils.py
+++ b/cl/lib/command_utils.py
@@ -17,9 +17,8 @@ def handle(self, *args, **options):
             logger.setLevel(logging.INFO)
         elif verbosity > 1:
             logger.setLevel(logging.DEBUG)
-            # This will make juriscraper's logger, called "Logger"
-            # accept most logger calls.
-            juriscraper_logger = logging.getLogger("Logger")
+            # This will make juriscraper's logger accept most logger calls.
+            juriscraper_logger = logging.getLogger("juriscraper")
             juriscraper_logger.setLevel(logging.DEBUG)
 
 
diff --git a/cl/settings/project/logging.py b/cl/settings/project/logging.py
index 30e561cb00..5c617ea8e0 100644
--- a/cl/settings/project/logging.py
+++ b/cl/settings/project/logging.py
@@ -78,12 +78,11 @@ def skip_unreadable_post(record):
         },
         # This is the one that's used practically everywhere in the code.
         "cl": {"handlers": ["console"], "level": "INFO", "propagate": True},
-        # Juriscraper's logger is called "Logger"
         # CRITICAL is the highest log level, which will make the logger
         # reject most logger calls from juriscraper: debug, info and warning
         # This level may be modified on a VerboseCommand call with the
         # proper verbosity value
-        "Logger": {
+        "juriscraper": {
             "handlers": ["console"],
             "propagate": True,
             "level": "CRITICAL",

From 77ea952bc573540cba3dac0bdc619d13328cec29 Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Mon, 22 Jul 2024 18:37:41 -0300
Subject: [PATCH 071/372] removing additional tables

---
 scripts/make_bulk_data.sh | 37 -------------------------------------
 1 file changed, 37 deletions(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index a507283432..1619ef8a8f 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -195,18 +195,6 @@ people_db_person_race_fields='(
 people_db_person_race_csv_filename="people-db-races-$(date -I).csv"
 
 
-people_db_attorneyorganization_fields='(
-	       id, date_created, date_modified, lookup_key, name, address1, address2, city, state, zip_code
-	   )'
-people_db_attorneyorganization_csv_filename="people_db_attorneyorganization-$(date -I).csv"
-
-
-people_db_attorney_fields='(
-	       id, date_created, date_modified, name, contact_raw, phone, fax, email
-	   )'
-people_db_attorney_csv_filename="people_db_attorney-$(date -I).csv"
-
-
 docket_fields='(
 	       id, date_created, date_modified, date_cert_granted, date_cert_denied, date_argued,
 		   date_reargued, date_reargument_denied, case_name_short, case_name, case_name_full, slug,
@@ -233,25 +221,6 @@ fjcintegrateddatabase_fields='(
 fjcintegrateddatabase_csv_filename="recap_fjcintegrateddatabase-$(date -I).csv"
 
 
-people_db_role_fields='(
-	       id, role, date_action, attorney_id, docket_id, party_id, role_raw
-	   )'
-people_db_role_csv_filename="people_db_role-$(date -I).csv"
-
-
-people_db_attorneyorganizationassociation_fields='(
-	       id, attorney_id, attorney_organization_id, docket_id
-	   )'
-people_db_attorneyorganizationassociation_csv_filename="people_db_attorneyorganizationassociation-$(date -I).csv"
-
-
-search_docketentry_fields='(
-	       id, date_created_date_modified, date_filed, entry_number, description, docket_id,
-		   pacer_sequence_number, recap_sequence_number
-	   )'
-search_docketentry_csv_filename="search_docketentry-$(date -I).csv"
-
-
 search_opinioncluster_panel_fields='(
 	       id, opinioncluster_id, person_id
 	   )'
@@ -276,14 +245,8 @@ declare -a t_5=("people_db_position" "$people_db_position_fields" "$people_db_po
 declare -a t_6=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename")
 declare -a t_7=("search_originatingcourtinformation" "$originatingcourtinformation_fields" "$originatingcourtinformation_csv_filename")
 
-declare -a t_8=("people_db_attorneyorganization" "$people_db_attorneyorganization_fields" "$people_db_attorneyorganization_csv_filename")
-declare -a t_9=("people_db_attorney" "$people_db_attorney_fields" "$people_db_attorney_csv_filename")
 declare -a t_10=("search_docket" "$docket_fields" "$dockets_csv_filename")
 declare -a t_11=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
-declare -a t_12=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename")
-declare -a t_13=("people_db_role" "$people_db_role_fields" "$people_db_role_csv_filename")
-declare -a t_14=("people_db_attorneyorganizationassociation" "$people_db_attorneyorganizationassociation_fields" "$people_db_attorneyorganizationassociation_csv_filename")
-declare -a t_15=("search_docketentry" "$search_docketentry_fields" "$search_docketentry_csv_filename")
 declare -a t_16=("search_opinioncluster_panel" "$search_opinioncluster_panel_fields" "$search_opinioncluster_panel_csv_filename")
 declare -a t_17=("search_opinioncluster_non_participating_judges" "$search_opinioncluster_non_participating_judges_fields" "$search_opinioncluster_non_participating_judges_csv_filename")
 

From e2377fa0e88505a15e738e4483561e3644c6ae21 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 22 Jul 2024 13:55:24 -0400
Subject: [PATCH 072/372] feat(db_manifest): Add argument for custom output
 file name

Allows users to specify a custom name for the generated output files.
---
 .../management/commands/make_aws_manifest_files.py  | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/make_aws_manifest_files.py b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
index 479708c0d0..b0f2dfba2e 100644
--- a/cl/corpus_importer/management/commands/make_aws_manifest_files.py
+++ b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
@@ -170,6 +170,12 @@ def add_arguments(self, parser: CommandParser):
             default=False,
             help="Use this flag to run the queries in the replica db",
         )
+        parser.add_argument(
+            "--file-name",
+            type=str,
+            default=None,
+            help="Custom name for the output files. If not provided, a default name will be used.",
+        )
 
     def handle(self, *args, **options):
         r = get_redis_interface("CACHE")
@@ -200,6 +206,11 @@ def handle(self, *args, **options):
             r.hget(f"{record_type}_import_status", "next_iteration_counter")
             or 0
         )
+        file_name = (
+            options["file_name"]
+            if options["file_name"]
+            else f"{record_type}_filelist"
+        )
         while True:
             query, params = get_custom_query(
                 options["record_type"],
@@ -237,7 +248,7 @@ def handle(self, *args, **options):
                     writer.writerow(query_dict)
 
                 s3_client.put_object(
-                    Key=f"{record_type}_filelist_{counter}.csv",
+                    Key=f"{file_name}_{counter}.csv",
                     Bucket=bucket_name,
                     Body=csvfile.getvalue().encode("utf-8"),
                 )

From 2c9d9a4a8d13d539509301bba8ad2eb1250a2577 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 22 Jul 2024 15:28:05 -0400
Subject: [PATCH 073/372] feat(db_manifest): Implements support for random
 sampling

This commit introduces a new argument  to enable querying random
samples of data.
---
 .../commands/make_aws_manifest_files.py       | 36 +++++++++++++++----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/cl/corpus_importer/management/commands/make_aws_manifest_files.py b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
index b0f2dfba2e..8b1ae3b546 100644
--- a/cl/corpus_importer/management/commands/make_aws_manifest_files.py
+++ b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
@@ -174,7 +174,15 @@ def add_arguments(self, parser: CommandParser):
             "--file-name",
             type=str,
             default=None,
-            help="Custom name for the output files. If not provided, a default name will be used.",
+            help="Custom name for the output files. If not provided, a default "
+            "name will be used.",
+        )
+        parser.add_argument(
+            "--random-sample-percentage",
+            type=float,
+            default=None,
+            help="Specifies the proportion of the table to be sampled (between "
+            "0.0 and 100.0). Use this flag to retrieve a random set of records.",
         )
 
     def handle(self, *args, **options):
@@ -216,7 +224,8 @@ def handle(self, *args, **options):
                 options["record_type"],
                 last_pk,
             )
-            params.append(options["query_batch_size"])
+            if not options["random_sample_percentage"]:
+                params.append(options["query_batch_size"])
 
             with connections[
                 "replica" if options["use_replica"] else "default"
@@ -237,13 +246,20 @@ def handle(self, *args, **options):
                     extrasaction="ignore",
                 )
                 for row in batched(rows, options["lambda_record_size"]):
-                    query_dict = {
-                        "bucket": bucket_name,
-                        "file_name": (
+                    if options["random_sample_percentage"]:
+                        # Create an underscore-separated file name that lambda
+                        # can split and use as part of batch processing.
+                        ids = [str(r[0]) for r in row]
+                        content = "_".join(ids)
+                    else:
+                        content = (
                             f"{row[0][0]}_{row[-1][0]}"
                             if len(row) > 1
                             else f"{row[0][0]}"
-                        ),
+                        )
+                    query_dict = {
+                        "bucket": bucket_name,
+                        "file_name": content,
                     }
                     writer.writerow(query_dict)
 
@@ -253,6 +269,14 @@ def handle(self, *args, **options):
                     Body=csvfile.getvalue().encode("utf-8"),
                 )
 
+            if options["random_sample_percentage"]:
+                # Due to the non-deterministic nature of random sampling,
+                # storing data to recover the query for future executions
+                # wouldn't be meaningful. Random queries are unlikely to
+                # produce the same results on subsequent runs.
+                logger.info(f"Finished processing {record_count} records")
+                break
+
             counter += 1
             last_pk = rows[-1][0]
             records_processed = int(

From 3a1f9752e1645fee12594922a38816f9deac95c1 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 22 Jul 2024 15:33:46 -0400
Subject: [PATCH 074/372] feat(db_manifest): Enhances filtering logic in
 get_total_number_of_records

---
 .../commands/make_aws_manifest_files.py       | 40 ++++++++++---------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/cl/corpus_importer/management/commands/make_aws_manifest_files.py b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
index 8b1ae3b546..30268a0269 100644
--- a/cl/corpus_importer/management/commands/make_aws_manifest_files.py
+++ b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
@@ -14,45 +14,49 @@
 s3_client = boto3.client("s3")
 
 
-def get_total_number_of_records(type: str, use_replica: bool = False) -> int:
+def get_total_number_of_records(type: str, options: dict[str, Any]) -> int:
     """
     Retrieves the total number of records for a specific data type.
 
     Args:
         type (str): The type of data to count. Must be one of the valid values
             from the `SEARCH_TYPES` class.
-        use_replica (bool, optional): Whether to use the replica database
-            connection (default: False).
+        options (dict[str, Any]): A dictionary containing options for filtering
+            the results.
+            - 'use_replica' (bool, optional): Whether to use the replica database
+              connection (default: False).
+            - 'random_sample_percentage' (float, optional): The percentage of
+            records  to include in a random sample.
 
     Returns:
         int: The total number of records matching the specified data type.
     """
     match type:
         case SEARCH_TYPES.RECAP_DOCUMENT:
-            query = """
-            SELECT count(*) AS exact_count
-            FROM search_recapdocument
+            base_query = (
+                "SELECT count(*) AS exact_count FROM search_recapdocument"
+            )
+            filter_clause = """
             WHERE is_available=True AND page_count>0 AND ocr_status!=1
             """
         case SEARCH_TYPES.OPINION:
-            query = """
-            SELECT count(*) AS exact_count
-            FROM search_opinion
-            WHERE extracted_by_ocr != true
-            """
+            base_query = "SELECT count(*) AS exact_count FROM search_opinion"
+            filter_clause = "WHERE extracted_by_ocr != true"
         case SEARCH_TYPES.ORAL_ARGUMENT:
-            query = """
-            SELECT count(*) AS exact_count
-            FROM audio_audio
-            WHERE
-                local_path_mp3 != '' AND
+            base_query = "SELECT count(*) AS exact_count FROM audio_audio"
+            filter_clause = """WHERE local_path_mp3 != '' AND
                 download_url != 'https://www.cadc.uscourts.gov/recordings/recordings.nsf/' AND
                 position('Unavailable' in download_url) = 0 AND
                 duration > 30
             """
 
+    if options["random_sample_percentage"]:
+        percentage = options["random_sample_percentage"]
+        base_query = f"{base_query} TABLESAMPLE SYSTEM ({percentage})"
+
+    query = f"{base_query}\n {filter_clause}\n"
     with connections[
-        "replica" if use_replica else "default"
+        "replica" if options["use_replica"] else "default"
     ].cursor() as cursor:
         cursor.execute(query, [])
         result = cursor.fetchone()
@@ -202,7 +206,7 @@ def handle(self, *args, **options):
         )
         if not total_number_of_records:
             total_number_of_records = get_total_number_of_records(
-                record_type, options["use_replica"]
+                record_type, options
             )
             r.hset(
                 f"{record_type}_import_status",

From a33f1885cc4ba521772177b9f3d55c2879400b1a Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 22 Jul 2024 15:39:37 -0400
Subject: [PATCH 075/372] feat(db_manifest): Refines the get_custom_query
 method

---
 .../commands/make_aws_manifest_files.py       | 44 +++++++------------
 1 file changed, 16 insertions(+), 28 deletions(-)

diff --git a/cl/corpus_importer/management/commands/make_aws_manifest_files.py b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
index 30268a0269..ee310b1bad 100644
--- a/cl/corpus_importer/management/commands/make_aws_manifest_files.py
+++ b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
@@ -64,7 +64,9 @@ def get_total_number_of_records(type: str, options: dict[str, Any]) -> int:
     return int(result[0])
 
 
-def get_custom_query(type: str, last_pk: str) -> tuple[str, list[Any]]:
+def get_custom_query(
+    type: str, last_pk: str, options: dict[str, Any]
+) -> tuple[str, list[Any]]:
     """
     Generates a custom SQL query based on the provided type and optional last
     pk.
@@ -73,6 +75,10 @@ def get_custom_query(type: str, last_pk: str) -> tuple[str, list[Any]]:
         type (str): Type of data to retrieve.
         last_pk (int, optional): Last primary key retrieved in a previous
             query. Defaults to None.
+        options (dict[str, Any]): A dictionary containing options for filtering
+            the results.
+            - 'random_sample_percentage' (float, optional): The percentage of
+            records to include in a random sample.
 
     Returns:
         tuple[str, list[Any]]: A tuple containing the constructed SQL
@@ -80,47 +86,30 @@ def get_custom_query(type: str, last_pk: str) -> tuple[str, list[Any]]:
             the query.
     """
     params = []
-
+    random_sample = options["random_sample_percentage"]
     match type:
         case SEARCH_TYPES.RECAP_DOCUMENT:
             base_query = "SELECT id from search_recapdocument"
             filter_clause = (
                 "WHERE is_available=True AND page_count>0 AND ocr_status!=1"
-                if not last_pk
-                else (
-                    "WHERE id > %s AND is_available = True AND page_count > 0"
-                    " AND ocr_status != 1"
-                )
             )
         case SEARCH_TYPES.OPINION:
             base_query = "SELECT id from search_opinion"
-            filter_clause = (
-                "WHERE extracted_by_ocr != true"
-                if not last_pk
-                else "WHERE id > %s AND extracted_by_ocr != true"
-            )
+            filter_clause = "WHERE extracted_by_ocr != true"
         case SEARCH_TYPES.ORAL_ARGUMENT:
             base_query = "SELECT id from audio_audio"
-            no_argument_where_clause = """
+            filter_clause = """
             WHERE local_path_mp3 != '' AND
                 download_url != 'https://www.cadc.uscourts.gov/recordings/recordings.nsf/' AND
                 position('Unavailable' in download_url) = 0 AND
                 duration > 30
             """
-            where_clause_with_argument = """
-            WHERE id > %s AND
-                local_path_mp3 != '' AND
-                download_url != 'https://www.cadc.uscourts.gov/recordings/recordings.nsf/' AND
-                position('Unavailable' in download_url) = 0 AND
-                duration > 30
-            """
-            filter_clause = (
-                no_argument_where_clause
-                if not last_pk
-                else where_clause_with_argument
-            )
 
-    if last_pk:
+    if random_sample:
+        base_query = f"{base_query} TABLESAMPLE SYSTEM ({random_sample})"
+
+    if last_pk and not random_sample:
+        filter_clause = f"{filter_clause} AND id > %s"
         params.append(last_pk)
 
     query = f"{base_query}\n {filter_clause}\n ORDER BY id\n LIMIT %s"
@@ -225,8 +214,7 @@ def handle(self, *args, **options):
         )
         while True:
             query, params = get_custom_query(
-                options["record_type"],
-                last_pk,
+                options["record_type"], last_pk, options
             )
             if not options["random_sample_percentage"]:
                 params.append(options["query_batch_size"])

From 0dbfa4817c4894442f3c80075d279fcd766c8540 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 22 Jul 2024 16:01:40 -0400
Subject: [PATCH 076/372] feat(db_manifest): Adds argument to query all records
 from table

---
 .../commands/make_aws_manifest_files.py       | 32 +++++++++++++++++--
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/cl/corpus_importer/management/commands/make_aws_manifest_files.py b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
index ee310b1bad..ebc4690312 100644
--- a/cl/corpus_importer/management/commands/make_aws_manifest_files.py
+++ b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
@@ -54,7 +54,11 @@ def get_total_number_of_records(type: str, options: dict[str, Any]) -> int:
         percentage = options["random_sample_percentage"]
         base_query = f"{base_query} TABLESAMPLE SYSTEM ({percentage})"
 
-    query = f"{base_query}\n {filter_clause}\n"
+    query = (
+        f"{base_query}\n"
+        if options["all_records"]
+        else f"{base_query}\n {filter_clause}\n"
+    )
     with connections[
         "replica" if options["use_replica"] else "default"
     ].cursor() as cursor:
@@ -108,11 +112,26 @@ def get_custom_query(
     if random_sample:
         base_query = f"{base_query} TABLESAMPLE SYSTEM ({random_sample})"
 
+    if options["all_records"]:
+        filter_clause = ""
+
+    # Using a WHERE clause with `id > last_pk` and a LIMIT clause for batch
+    # retrieval is not suitable for random sampling. The following logic
+    # removes these clauses when retrieving a random sample to ensure all rows
+    # have an equal chance of being selected.
     if last_pk and not random_sample:
-        filter_clause = f"{filter_clause} AND id > %s"
+        filter_clause = (
+            f"WHERE id > %s"
+            if not filter_clause
+            else f"{filter_clause} AND id > %s"
+        )
         params.append(last_pk)
 
-    query = f"{base_query}\n {filter_clause}\n ORDER BY id\n LIMIT %s"
+    query = (
+        f"{base_query}\n {filter_clause}"
+        if random_sample
+        else f"{base_query}\n {filter_clause}\n ORDER BY id\n LIMIT %s"
+    )
 
     return query, params
 
@@ -177,6 +196,13 @@ def add_arguments(self, parser: CommandParser):
             help="Specifies the proportion of the table to be sampled (between "
             "0.0 and 100.0). Use this flag to retrieve a random set of records.",
         )
+        parser.add_argument(
+            "--all-records",
+            action="store_true",
+            default=False,
+            help="Use this flag to retrieve all records from the table without"
+            " applying any filters.",
+        )
 
     def handle(self, *args, **options):
         r = get_redis_interface("CACHE")

From e395522098c09d82e0e5d2549e0778107d3b4181 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 23 Jul 2024 09:55:31 -0400
Subject: [PATCH 077/372] fix(extras.py): Adjust court group filter

No abbreviations
---
 cl/custom_filters/templatetags/extras.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 2624e81cb6..3ea483b921 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -263,17 +263,11 @@ def group_courts(courts: list[Court], num_columns: int) -> list:
         # Calculate the end index for this chunk
         end = start + column_len + (1 if index < remainder else 0)
 
-        # Find the next 'TS' or 'S' starting point
-        while end < len(courts) and courts[end].jurisdiction not in [
-            "TS",
-            "S",
-        ]:
+        # Find the next COLR as a starting point (Court of last resort)
+        COLRs = [Court.TERRITORY_SUPREME, Court.STATE_SUPREME]
+        while end < len(courts) and courts[end].jurisdiction not in COLRs:
             end += 1
 
-        # Adjust the chunk to start with 'TS' or 'S'
-        while start < end and courts[start].jurisdiction not in ["TS", "S"]:
-            start += 1
-
         # Create the column and add it to result
         groups.append(courts[start:end])
         start = end

From 6e0633926965f03784f4a0dbe58f9bfa9c104533 Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Tue, 23 Jul 2024 12:41:16 -0300
Subject: [PATCH 078/372] readding tables removed erroneously

---
 scripts/make_bulk_data.sh | 114 ++++++++++++++++++++++++++++++++------
 1 file changed, 96 insertions(+), 18 deletions(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 1619ef8a8f..fdc35e5b62 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -232,8 +232,76 @@ search_opinioncluster_non_participating_judges_fields='(
 	   )'
 search_opinioncluster_non_participating_judges_csv_filename="search_opinioncluster_non_participating_judges-$(date -I).csv"
 
+# disclosures_financialdisclosure
+financialdisclosure_fields='(
+	       id, date_created, date_modified, year, download_filepath, filepath, thumbnail,
+	       thumbnail_status, page_count, sha1, report_type, is_amended, addendum_content_raw,
+	       addendum_redacted, has_been_extracted, person_id
+	   )'
+financialdisclosure_csv_filename="financial-disclosures-$(date -I).csv"
+
+# disclosures_investment
+investment_fields='(
+	       id, date_created, date_modified, page_number, description, redacted,
+	       income_during_reporting_period_code, income_during_reporting_period_type,
+	       gross_value_code, gross_value_method,
+	       transaction_during_reporting_period, transaction_date_raw,
+	       transaction_date, transaction_value_code, transaction_gain_code,
+	       transaction_partner, has_inferred_values, financial_disclosure_id
+	   )'
+investment_csv_filename="financial-disclosure-investments-$(date -I).csv"
+
+# disclosures_position
+disclosures_position_fields='(
+	       id, date_created, date_modified, position, organization_name,
+	       redacted, financial_disclosure_id
+	   )'
+disclosures_position_csv_filename="financial-disclosures-positions-$(date -I).csv"
+
+# disclosures_agreement
+disclosures_agreement_fields='(
+	       id, date_created, date_modified, date_raw, parties_and_terms,
+	       redacted, financial_disclosure_id
+	   )'
+disclosures_agreement_csv_filename="financial-disclosures-agreements-$(date -I).csv"
+
+# disclosures_noninvestmentincome
+noninvestmentincome_fields='(
+	       id, date_created, date_modified, date_raw, source_type,
+	       income_amount, redacted, financial_disclosure_id
+	   )'
+noninvestmentincome_csv_filename="financial-disclosures-non-investment-income-$(date -I).csv"
+
+# disclosures_spouseincome
+spouseincome_fields='(
+	       id, date_created, date_modified, source_type, date_raw, redacted,
+	       financial_disclosure_id
+	   )'
+spouseincome_csv_filename="financial-disclosures-spousal-income-$(date -I).csv"
+
+# disclosures_reimbursement
+disclosures_reimbursement_fields='(
+	       id, date_created, date_modified, source, date_raw, location,
+	       purpose, items_paid_or_provided, redacted, financial_disclosure_id
+	   )'
+disclosures_reimbursement_csv_filename="financial-disclosures-reimbursements-$(date -I).csv"
+
+# disclosures_gift
+disclosures_gift_fields='(
+	       id, date_created, date_modified, source, description, value,
+	       redacted, financial_disclosure_id
+	   )'
+disclosures_gift_csv_filename="financial-disclosures-gifts-$(date -I).csv"
+
+# disclosures_debt
+disclosures_debt_fields='(
+	       id, date_created, date_modified, creditor_name, description,
+	       value_code, redacted, financial_disclosure_id
+	   )'
+disclosures_debt_csv_filename="financial-disclosures-debts-$(date -I).csv"
+
 # If you add or remove a table, you need to update this number
-NUM_TABLES=29
+NUM_TABLES=32
 
 # Every new table added to bulk script should be added as an associative array
 # This ordering is important. Tables with foreign key constraints must be loaded in order.
@@ -245,23 +313,33 @@ declare -a t_5=("people_db_position" "$people_db_position_fields" "$people_db_po
 declare -a t_6=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename")
 declare -a t_7=("search_originatingcourtinformation" "$originatingcourtinformation_fields" "$originatingcourtinformation_csv_filename")
 
-declare -a t_10=("search_docket" "$docket_fields" "$dockets_csv_filename")
-declare -a t_11=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
-declare -a t_16=("search_opinioncluster_panel" "$search_opinioncluster_panel_fields" "$search_opinioncluster_panel_csv_filename")
-declare -a t_17=("search_opinioncluster_non_participating_judges" "$search_opinioncluster_non_participating_judges_fields" "$search_opinioncluster_non_participating_judges_csv_filename")
-
-declare -a t_18=("search_opinion" "$opinion_fields" "$opinions_csv_filename")
-declare -a t_19=("search_opinion_joined_by" "$search_opinion_joined_by_fields" "$search_opinion_joined_by_csv_filename")
-declare -a t_20=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename")
-declare -a t_21=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename")
-declare -a t_22=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename")
-declare -a t_23=("search_citation" "$citation_fields" "$citations_csv_filename")
-declare -a t_24=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename")
-declare -a t_25=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename")
-declare -a t_26=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename")
-declare -a t_27=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename")
-declare -a t_28=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename")
-declare -a t_29=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename")
+declare -a t_8=("search_docket" "$docket_fields" "$dockets_csv_filename")
+declare -a t_9=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
+declare -a t_10=("search_opinioncluster_panel" "$search_opinioncluster_panel_fields" "$search_opinioncluster_panel_csv_filename")
+declare -a t_11=("search_opinioncluster_non_participating_judges" "$search_opinioncluster_non_participating_judges_fields" "$search_opinioncluster_non_participating_judges_csv_filename")
+
+declare -a t_12=("search_opinion" "$opinion_fields" "$opinions_csv_filename")
+declare -a t_13=("search_opinion_joined_by" "$search_opinion_joined_by_fields" "$search_opinion_joined_by_csv_filename")
+declare -a t_14=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename")
+declare -a t_15=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename")
+declare -a t_16=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename")
+declare -a t_17=("search_citation" "$citation_fields" "$citations_csv_filename")
+declare -a t_18=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename")
+declare -a t_19=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename")
+declare -a t_20=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename")
+declare -a t_21=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename")
+declare -a t_22=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename")
+declare -a t_23=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename")
+
+declare -a t_24=("disclosures_financialdisclosure" "$financialdisclosure_fields" "$financialdisclosure_csv_filename")
+declare -a t_25=("disclosures_investment" "$investment_fields" "$investment_csv_filename")
+declare -a t_26=("disclosures_position" "$disclosures_position_fields" "$disclosures_position_csv_filename")
+declare -a t_27=("disclosures_agreement" "$disclosures_agreement_fields" "$disclosures_agreement_csv_filename")
+declare -a t_28=("disclosures_noninvestmentincome" "$noninvestmentincome_fields" "$noninvestmentincome_csv_filename")
+declare -a t_29=("disclosures_spouseincome" "$spouseincome_fields" "$spouseincome_csv_filename")
+declare -a t_30=("disclosures_reimbursement" "$disclosures_reimbursement_fields" "$disclosures_reimbursement_csv_filename")
+declare -a t_31=("disclosures_gift" "$disclosures_gift_fields" "$disclosures_gift_csv_filename")
+declare -a t_32=("disclosures_debt" "$disclosures_debt_fields" "$disclosures_debt_csv_filename")
 
 # Create a new array with the data of each associative array
 declare -a listOfLists

From 02c07604a328bbcd5f986a40915b689a4ba559f9 Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Tue, 23 Jul 2024 12:59:34 -0300
Subject: [PATCH 079/372] remove duplicate docket_fields definition lines

---
 scripts/make_bulk_data.sh | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index fdc35e5b62..4eaebf15e3 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -195,20 +195,6 @@ people_db_person_race_fields='(
 people_db_person_race_csv_filename="people-db-races-$(date -I).csv"
 
 
-docket_fields='(
-	       id, date_created, date_modified, date_cert_granted, date_cert_denied, date_argued,
-		   date_reargued, date_reargument_denied, case_name_short, case_name, case_name_full, slug,
-		   docket_number, blocked, court_id, assigned_to_id, cause, date_filed, date_list_filing,
-		   date_terminated, filepath_ia, filepath_local, jurisdiction_type, jury_demand,
-		   nature_of_suit, pacer_case_id, referred_to_id, source, assigned_to_str, view_count,
-		   date_last_index, appeal_from_str, appellate_case_type_information,
-		   appellate_fee_status, panel_str, originating_court_information_id, mdl_status,
-		   filepath_ia_json, ia_date_first_change, ia_needs_upload, ia_upload_failure_count,
-		   docket_number_core, idb_data_id
-	   )'
-dockets_csv_filename="search_docket-$(date -I).csv"
-
-
 fjcintegrateddatabase_fields='(
 	       id, dataset_source, date_created, date_modified, office, docket_number, origin, date_filed,
 		   jurisdiction, nature_of_suit, title, section, subsection, diversity_of_residence, class_action,

From 838dc9e45ff114817410fdbe74bf8b455d52fdaf Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Tue, 23 Jul 2024 13:00:29 -0300
Subject: [PATCH 080/372] remove duplicate fjcintegrateddatabase_fields
 definition

---
 scripts/make_bulk_data.sh | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 4eaebf15e3..10b9b68f04 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -195,18 +195,6 @@ people_db_person_race_fields='(
 people_db_person_race_csv_filename="people-db-races-$(date -I).csv"
 
 
-fjcintegrateddatabase_fields='(
-	       id, dataset_source, date_created, date_modified, office, docket_number, origin, date_filed,
-		   jurisdiction, nature_of_suit, title, section, subsection, diversity_of_residence, class_action,
-		   monetary_demand, county_of_residence, arbitrarion_at_filing, arbitration_at_termination,
-		   multidistrict_litigation_docket_number, plaintiff, defendant, date_transfer, transfer_office,
-		   transfer_docket_number, transfer_oprigin, date_terminated, termination_class_action_status,
-		   procedural_progress, disposition, nature_of_judgement, amount_recieved, judgment, pro_se,
-		   year_of_tape, circuit_id, district_id, nature_of_offense, version
-	   )'
-fjcintegrateddatabase_csv_filename="recap_fjcintegrateddatabase-$(date -I).csv"
-
-
 search_opinioncluster_panel_fields='(
 	       id, opinioncluster_id, person_id
 	   )'

From a06d7b2a88ec1c95316cab4f4b7400490f1a30fe Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Tue, 23 Jul 2024 15:51:02 -0500
Subject: [PATCH 081/372] fix(scrapers.utils.make_objects): make overwritting
 data on existing docket optional

Overwrite data on existing docket set to True for the Harvard importer; set to false for opinion and oral argument scrapers
---
 .../management/commands/harvard_opinions.py           |  1 +
 cl/scrapers/management/commands/cl_scrape_opinions.py |  1 +
 .../management/commands/cl_scrape_oral_arguments.py   |  1 +
 cl/scrapers/utils.py                                  | 11 ++++++++++-
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/harvard_opinions.py b/cl/corpus_importer/management/commands/harvard_opinions.py
index 9b39278c90..6b27a619ce 100644
--- a/cl/corpus_importer/management/commands/harvard_opinions.py
+++ b/cl/corpus_importer/management/commands/harvard_opinions.py
@@ -501,6 +501,7 @@ def add_new_case(
             court_id,
             docket_string,
             Docket.HARVARD,
+            overwrite_existing_data=True,
             case_name_full=case_name_full,
             ia_needs_upload=False,
         )
diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index aa953cb946..e84c5c99a1 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -110,6 +110,7 @@ def make_objects(
         court.pk,
         item.get("docket_numbers", ""),
         item.get("source") or Docket.SCRAPER,
+        overwrite_existing_data=False,
         blocked=blocked,
         date_blocked=date_blocked,
         appeal_from_str=item.get("lower_courts", ""),
diff --git a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
index ed21e1ae26..733274cd0d 100644
--- a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
@@ -77,6 +77,7 @@ def make_objects(
         court.pk,
         item.get("docket_numbers", ""),
         item.get("source") or Docket.SCRAPER,
+        overwrite_existing_data=False,
         blocked=blocked,
         date_blocked=date_blocked,
         date_argued=item["case_dates"],
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index f5d5177436..9d0c6bcb0b 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -291,6 +291,7 @@ def update_or_create_docket(
     court_id: str,
     docket_number: str,
     source: int,
+    overwrite_existing_data: bool,
     blocked: bool = False,
     case_name_full: str = "",
     date_blocked: date | None = None,
@@ -306,6 +307,10 @@ def update_or_create_docket(
     :param court_id: The court id the docket belongs to.
     :param docket_number: The docket number.
     :param source: The docket source.
+    :param overwrite_existing_data: should be True when this function is
+        called from the Harvard importer; the Harvard data is considered
+        more trustable  and should overwrite an existing docket's data
+        Should be False when called from scrapers.
     :param blocked: If the docket should be blocked, default False.
     :param case_name_full: The docket case_name_full.
     :param date_blocked: The docket date_blocked if it's blocked.
@@ -334,7 +339,11 @@ def update_or_create_docket(
         for field, value in docket_fields.items():
             if not value:
                 continue
-            if getattr(docket, field) and getattr(docket, field) != value:
+            if (
+                not overwrite_existing_data
+                and getattr(docket, field)
+                and getattr(docket, field) != value
+            ):
                 # Prevent overwriting values that already exist, since default values
                 # to this function are empty strings or None
                 logger.error(

From 7f33bcf68932b572b031eec7d1bc7c2cc19b6d01 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Tue, 23 Jul 2024 16:15:18 -0500
Subject: [PATCH 082/372] fix(scrapers.tests): test that existing
 docket.case_name is not overwritten

---
 cl/scrapers/tests.py | 9 ++++-----
 cl/scrapers/utils.py | 5 ++++-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 558423dc4c..2882b7c7c7 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -71,7 +71,7 @@ def test_ingest_opinions_from_scraper(self) -> None:
         """Can we successfully ingest opinions at a high level?"""
 
         d_1 = DocketFactory(
-            case_name="Tarrant Regional Water District v. Herrmann old",
+            case_name="Tarrant Regional Water District v. Herrmann",
             docket_number="11-889",
             court=self.court,
             source=Docket.RECAP,
@@ -79,7 +79,7 @@ def test_ingest_opinions_from_scraper(self) -> None:
         )
 
         d_2 = DocketFactory(
-            case_name="State of Indiana v. Charles Barker old",
+            case_name="State of Indiana v. Charles Barker",
             docket_number="49S00-0308-DP-392",
             court=self.court,
             source=Docket.IDB,
@@ -87,7 +87,7 @@ def test_ingest_opinions_from_scraper(self) -> None:
         )
 
         d_3 = DocketFactory(
-            case_name="Intl Fidlty Ins Co v. Ideal Elec Sec Co old",
+            case_name="Intl Fidlty Ins Co v. Ideal Elec Sec Co",
             docket_number="96-7169",
             court=self.court,
             source=Docket.RECAP_AND_IDB,
@@ -205,7 +205,7 @@ def test_ingest_oral_arguments(self) -> None:
         """Can we successfully ingest oral arguments at a high level?"""
 
         d_1 = DocketFactory(
-            case_name="Jeremy v. Julian old",
+            case_name="Jeremy v. Julian",
             docket_number="23-232388",
             court=self.court,
             source=Docket.RECAP,
@@ -230,7 +230,6 @@ def test_ingest_oral_arguments(self) -> None:
             f"Should have 2 dockets, not {dockets.count()}",
         )
         d_1.refresh_from_db()
-        self.assertEqual(d_1.case_name, "Jeremy v. Julian")
         self.assertEqual(d_1.source, Docket.RECAP_AND_SCRAPER)
 
         # Confirm that OA Search Alerts are properly triggered after an OA is
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index 9d0c6bcb0b..721ee428f3 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -337,8 +337,11 @@ def update_or_create_docket(
         docket.add_opinions_source(source)
 
         for field, value in docket_fields.items():
-            if not value:
+            # do not use blanket `if not value:`, since
+            # blocked and ia_needs_upload are booleans and would be skipped
+            if value is None or value == "":
                 continue
+
             if (
                 not overwrite_existing_data
                 and getattr(docket, field)

From 0be0f3c3e5408e00cbc14fc9d608dd83a7cfef75 Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Wed, 24 Jul 2024 12:48:42 -0300
Subject: [PATCH 083/372] adding search_opinion_joined_by_fields definitions

---
 scripts/make_bulk_data.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 10b9b68f04..42469203e7 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -93,6 +93,12 @@ opinioncluster_fields='(
 	   )'
 opinioncluster_csv_filename="opinion-clusters-$(date -I).csv"
 
+search_opinion_joined_by_fields='(
+			id, opinion_id, person_id
+)'
+search_opinion_joined_by_csv_filename="search_opinion_joined_by-$(date -I).csv"
+
+
 # search_opinion
 opinion_fields='(
 	       id, date_created, date_modified, author_str, per_curiam, joined_by_str,

From a2ddb9f68bd171031e24031394afd82880f716c4 Mon Sep 17 00:00:00 2001
From: flooie <flooie@users.noreply.github.com>
Date: Wed, 24 Jul 2024 20:15:17 +0000
Subject: [PATCH 084/372] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a769a59f6a..c955986b5b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -686,13 +686,13 @@ files = [
 
 [[package]]
 name = "courts-db"
-version = "0.10.22"
+version = "0.10.25"
 description = "Database of Courts"
 optional = false
 python-versions = "*"
 files = [
-    {file = "courts-db-0.10.22.tar.gz", hash = "sha256:65f5bf1fe3e82b368572403097061f2b7b82ee86f11a0bebdff0176bef4e0288"},
-    {file = "courts_db-0.10.22-py2.py3-none-any.whl", hash = "sha256:f2152826e584e3885baa1fe8e6c98aeb4172462032ce9c9cbcf8abb2d153a42a"},
+    {file = "courts_db-0.10.25-py2.py3-none-any.whl", hash = "sha256:b61783bdee2d7afd549ce767d09b4883d835242f73eab9061e74efc2f19c6f64"},
+    {file = "courts_db-0.10.25.tar.gz", hash = "sha256:f96b4ef3227e6844b3d2d12970373f2f05cdda66a9f15cbe69527ff66649b9d9"},
 ]
 
 [[package]]

From a8ad9bf4f07294418b7174186a30fe59fb1a9f81 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 25 Jul 2024 12:20:25 -0400
Subject: [PATCH 085/372] feat(models): Update docket model

Add four new fields

- office code
- case type
- judge initials
- defendant number
---
 ...032_add_refined_docket_numbering_fields.py | 125 ++++++++++++++
 ...32_add_refined_docket_numbering_fields.sql | 159 ++++++++++++++++++
 cl/search/models.py                           |  34 ++++
 3 files changed, 318 insertions(+)
 create mode 100644 cl/search/migrations/0032_add_refined_docket_numbering_fields.py
 create mode 100644 cl/search/migrations/0032_add_refined_docket_numbering_fields.sql

diff --git a/cl/search/migrations/0032_add_refined_docket_numbering_fields.py b/cl/search/migrations/0032_add_refined_docket_numbering_fields.py
new file mode 100644
index 0000000000..12bbab273a
--- /dev/null
+++ b/cl/search/migrations/0032_add_refined_docket_numbering_fields.py
@@ -0,0 +1,125 @@
+# Generated by Django 5.0.7 on 2024-07-25 16:12
+
+import pgtrigger.compiler
+import pgtrigger.migrations
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
+    ]
+
+    operations = [
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="docket",
+            name="update_or_delete_snapshot_delete",
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="docket",
+            name="update_or_delete_snapshot_update",
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="case_type",
+            field=models.CharField(
+                blank=True,
+                db_index=True,
+                help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
+                max_length=5,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="defendant_number",
+            field=models.SmallIntegerField(
+                db_index=True,
+                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
+                null=True,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="judge_initials",
+            field=models.CharField(
+                blank=True,
+                db_index=True,
+                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
+                max_length=4,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="office_code",
+            field=models.CharField(
+                blank=True,
+                db_index=True,
+                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
+                max_length=1,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="case_type",
+            field=models.CharField(
+                blank=True,
+                help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
+                max_length=5,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="defendant_number",
+            field=models.SmallIntegerField(
+                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
+                null=True,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="judge_initials",
+            field=models.CharField(
+                blank=True,
+                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
+                max_length=4,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="office_code",
+            field=models.CharField(
+                blank=True,
+                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
+                max_length=1,
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="docket",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_update",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."office_code" IS DISTINCT FROM (NEW."office_code") OR OLD."case_type" IS DISTINCT FROM (NEW."case_type") OR OLD."judge_initials" IS DISTINCT FROM (NEW."judge_initials") OR OLD."defendant_number" IS DISTINCT FROM (NEW."defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))',
+                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
+                    hash="4eb11702944d1395e0fd5cc6961c710e6a4cfbe6",
+                    operation="UPDATE",
+                    pgid="pgtrigger_update_or_delete_snapshot_update_7e039",
+                    table="search_docket",
+                    when="AFTER",
+                ),
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="docket",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_delete",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
+                    hash="9bfc30d08c18095c6f4af55c771c158f71ccdc1a",
+                    operation="DELETE",
+                    pgid="pgtrigger_update_or_delete_snapshot_delete_7294f",
+                    table="search_docket",
+                    when="AFTER",
+                ),
+            ),
+        ),
+    ]
diff --git a/cl/search/migrations/0032_add_refined_docket_numbering_fields.sql b/cl/search/migrations/0032_add_refined_docket_numbering_fields.sql
new file mode 100644
index 0000000000..4ae558ce39
--- /dev/null
+++ b/cl/search/migrations/0032_add_refined_docket_numbering_fields.sql
@@ -0,0 +1,159 @@
+BEGIN;
+--
+-- Remove trigger update_or_delete_snapshot_delete from model docket
+--
+DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket";
+--
+-- Remove trigger update_or_delete_snapshot_update from model docket
+--
+DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket";
+--
+-- Add field case_type to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "case_type" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "case_type" DROP DEFAULT;
+--
+-- Add field defendant_number to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "defendant_number" smallint NULL;
+--
+-- Add field judge_initials to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "judge_initials" varchar(4) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "judge_initials" DROP DEFAULT;
+--
+-- Add field office_code to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "office_code" varchar(1) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "office_code" DROP DEFAULT;
+--
+-- Add field case_type to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "case_type" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "case_type" DROP DEFAULT;
+--
+-- Add field defendant_number to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "defendant_number" smallint NULL;
+--
+-- Add field judge_initials to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "judge_initials" varchar(4) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "judge_initials" DROP DEFAULT;
+--
+-- Add field office_code to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "office_code" varchar(1) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "office_code" DROP DEFAULT;
+--
+-- Create trigger update_or_delete_snapshot_update on model docket
+--
+
+            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
+                trigger_name NAME
+            )
+            RETURNS BOOLEAN AS $$
+                DECLARE
+                    _pgtrigger_ignore TEXT[];
+                    _result BOOLEAN;
+                BEGIN
+                    BEGIN
+                        SELECT INTO _pgtrigger_ignore
+                            CURRENT_SETTING('pgtrigger.ignore');
+                        EXCEPTION WHEN OTHERS THEN
+                    END;
+                    IF _pgtrigger_ignore IS NOT NULL THEN
+                        SELECT trigger_name = ANY(_pgtrigger_ignore)
+                        INTO _result;
+                        RETURN _result;
+                    ELSE
+                        RETURN FALSE;
+                    END IF;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_7e039()
+            RETURNS TRIGGER AS $$
+
+                BEGIN
+                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
+                        IF (TG_OP = 'DELETE') THEN
+                            RETURN OLD;
+                        ELSE
+                            RETURN NEW;
+                        END IF;
+                    END IF;
+                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket";
+            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039
+                AFTER UPDATE ON "search_docket"
+
+
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."office_code" IS DISTINCT FROM (NEW."office_code") OR OLD."case_type" IS DISTINCT FROM (NEW."case_type") OR OLD."judge_initials" IS DISTINCT FROM (NEW."judge_initials") OR OLD."defendant_number" IS DISTINCT FROM (NEW."defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))
+                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_7e039();
+
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket" IS '4eb11702944d1395e0fd5cc6961c710e6a4cfbe6';
+
+--
+-- Create trigger update_or_delete_snapshot_delete on model docket
+--
+
+            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
+                trigger_name NAME
+            )
+            RETURNS BOOLEAN AS $$
+                DECLARE
+                    _pgtrigger_ignore TEXT[];
+                    _result BOOLEAN;
+                BEGIN
+                    BEGIN
+                        SELECT INTO _pgtrigger_ignore
+                            CURRENT_SETTING('pgtrigger.ignore');
+                        EXCEPTION WHEN OTHERS THEN
+                    END;
+                    IF _pgtrigger_ignore IS NOT NULL THEN
+                        SELECT trigger_name = ANY(_pgtrigger_ignore)
+                        INTO _result;
+                        RETURN _result;
+                    ELSE
+                        RETURN FALSE;
+                    END IF;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_7294f()
+            RETURNS TRIGGER AS $$
+
+                BEGIN
+                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
+                        IF (TG_OP = 'DELETE') THEN
+                            RETURN OLD;
+                        ELSE
+                            RETURN NEW;
+                        END IF;
+                    END IF;
+                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket";
+            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f
+                AFTER DELETE ON "search_docket"
+
+
+                FOR EACH ROW
+                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_7294f();
+
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket" IS '9bfc30d08c18095c6f4af55c771c158f71ccdc1a';
+
+CREATE INDEX "search_docket_case_type_f76fd404" ON "search_docket" ("case_type");
+CREATE INDEX "search_docket_case_type_f76fd404_like" ON "search_docket" ("case_type" varchar_pattern_ops);
+CREATE INDEX "search_docket_defendant_number_04074363" ON "search_docket" ("defendant_number");
+CREATE INDEX "search_docket_judge_initials_2513584d" ON "search_docket" ("judge_initials");
+CREATE INDEX "search_docket_judge_initials_2513584d_like" ON "search_docket" ("judge_initials" varchar_pattern_ops);
+CREATE INDEX "search_docket_office_code_51016743" ON "search_docket" ("office_code");
+CREATE INDEX "search_docket_office_code_51016743_like" ON "search_docket" ("office_code" varchar_pattern_ops);
+COMMIT;
\ No newline at end of file
diff --git a/cl/search/models.py b/cl/search/models.py
index ef0f1e91e9..b2870dcb89 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -545,6 +545,40 @@ class Docket(AbstractDateTimeModel, DocketSources):
         blank=True,
         db_index=True,
     )
+    office_code = models.CharField(
+        help_text="A one digit statistical code (either alphabetic or numeric) "
+        "of the office within the district. In this "
+        "example, 2:07-cv-34911-MJL, the 2 preceding "
+        "the : is the office code.",
+        max_length=1,
+        blank=True,
+        db_index=True,
+    )
+    case_type = models.CharField(
+        help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), "
+        "petty offense (po), and miscellaneous (mc). These codes "
+        "can be upper case or lower case, and may vary in number of "
+        "characters.",
+        max_length=5,
+        blank=True,
+        db_index=True,
+    )
+    judge_initials = models.CharField(
+        help_text="A typically three-letter upper cased abbreviation "
+        "of the judge's initials. In the example 2:07-cv-34911-MJL, "
+        "MJL is the judge's initials. Judge initials change if a "
+        "new judge takes over a case.",
+        max_length=4,
+        blank=True,
+        db_index=True,
+    )
+    defendant_number = models.SmallIntegerField(
+        help_text="A unique number assigned to each defendant in a case, "
+        "typically found in pacer criminal cases as a -1, -2 after "
+        "the judge initials. Example: 1:14-cr-10363-RGS-1.",
+        null=True,
+        db_index=True,
+    )
     # Nullable for unique constraint requirements.
     pacer_case_id = fields.CharNullField(
         help_text="The case ID provided by PACER.",

From 59d3d9f5b4a47e27056f0467d3f1622570bdc22d Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 25 Jul 2024 13:06:07 -0400
Subject: [PATCH 086/372] feat(models): Add parent_docket

Added `parent_docket` ForeignKey to allow for linking child dockets to a parent docket.
---
 cl/search/models.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/cl/search/models.py b/cl/search/models.py
index b2870dcb89..4b5c7445df 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -365,6 +365,17 @@ class Docket(AbstractDateTimeModel, DocketSources):
         blank=True,
         null=True,
     )
+    parent_docket = models.ForeignKey(
+        "self",
+        help_text="In criminal cases (and some magistrate) PACER creates "
+        "a parent docket and one or more child dockets. Child dockets "
+        "contain docket information for each individual defendant "
+        "while parent dockets are a superset of all docket entries.",
+        on_delete=models.SET_NULL,
+        blank=True,
+        null=True,
+        related_name="child_dockets",
+    )
     appeal_from_str = models.TextField(
         help_text=(
             "In appellate cases, this is the lower court or "

From 940142dea3ac682646145fbf472c38e673730d6b Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 25 Jul 2024 13:12:00 -0400
Subject: [PATCH 087/372] feat(models): Update migration files

---
 ...032_add_refined_docket_numbering_fields.py | 125 --------------
 ...pdate_docket_model_docket_number_fields.py | 152 ++++++++++++++++++
 ...ate_docket_model_docket_number_fields.sql} |  20 ++-
 3 files changed, 167 insertions(+), 130 deletions(-)
 delete mode 100644 cl/search/migrations/0032_add_refined_docket_numbering_fields.py
 create mode 100644 cl/search/migrations/0032_update_docket_model_docket_number_fields.py
 rename cl/search/migrations/{0032_add_refined_docket_numbering_fields.sql => 0032_update_docket_model_docket_number_fields.sql} (55%)

diff --git a/cl/search/migrations/0032_add_refined_docket_numbering_fields.py b/cl/search/migrations/0032_add_refined_docket_numbering_fields.py
deleted file mode 100644
index 12bbab273a..0000000000
--- a/cl/search/migrations/0032_add_refined_docket_numbering_fields.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Generated by Django 5.0.7 on 2024-07-25 16:12
-
-import pgtrigger.compiler
-import pgtrigger.migrations
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
-    ]
-
-    operations = [
-        pgtrigger.migrations.RemoveTrigger(
-            model_name="docket",
-            name="update_or_delete_snapshot_delete",
-        ),
-        pgtrigger.migrations.RemoveTrigger(
-            model_name="docket",
-            name="update_or_delete_snapshot_update",
-        ),
-        migrations.AddField(
-            model_name="docket",
-            name="case_type",
-            field=models.CharField(
-                blank=True,
-                db_index=True,
-                help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
-                max_length=5,
-            ),
-        ),
-        migrations.AddField(
-            model_name="docket",
-            name="defendant_number",
-            field=models.SmallIntegerField(
-                db_index=True,
-                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
-                null=True,
-            ),
-        ),
-        migrations.AddField(
-            model_name="docket",
-            name="judge_initials",
-            field=models.CharField(
-                blank=True,
-                db_index=True,
-                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
-                max_length=4,
-            ),
-        ),
-        migrations.AddField(
-            model_name="docket",
-            name="office_code",
-            field=models.CharField(
-                blank=True,
-                db_index=True,
-                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
-                max_length=1,
-            ),
-        ),
-        migrations.AddField(
-            model_name="docketevent",
-            name="case_type",
-            field=models.CharField(
-                blank=True,
-                help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
-                max_length=5,
-            ),
-        ),
-        migrations.AddField(
-            model_name="docketevent",
-            name="defendant_number",
-            field=models.SmallIntegerField(
-                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
-                null=True,
-            ),
-        ),
-        migrations.AddField(
-            model_name="docketevent",
-            name="judge_initials",
-            field=models.CharField(
-                blank=True,
-                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
-                max_length=4,
-            ),
-        ),
-        migrations.AddField(
-            model_name="docketevent",
-            name="office_code",
-            field=models.CharField(
-                blank=True,
-                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
-                max_length=1,
-            ),
-        ),
-        pgtrigger.migrations.AddTrigger(
-            model_name="docket",
-            trigger=pgtrigger.compiler.Trigger(
-                name="update_or_delete_snapshot_update",
-                sql=pgtrigger.compiler.UpsertTriggerSql(
-                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."office_code" IS DISTINCT FROM (NEW."office_code") OR OLD."case_type" IS DISTINCT FROM (NEW."case_type") OR OLD."judge_initials" IS DISTINCT FROM (NEW."judge_initials") OR OLD."defendant_number" IS DISTINCT FROM (NEW."defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))',
-                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
-                    hash="4eb11702944d1395e0fd5cc6961c710e6a4cfbe6",
-                    operation="UPDATE",
-                    pgid="pgtrigger_update_or_delete_snapshot_update_7e039",
-                    table="search_docket",
-                    when="AFTER",
-                ),
-            ),
-        ),
-        pgtrigger.migrations.AddTrigger(
-            model_name="docket",
-            trigger=pgtrigger.compiler.Trigger(
-                name="update_or_delete_snapshot_delete",
-                sql=pgtrigger.compiler.UpsertTriggerSql(
-                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
-                    hash="9bfc30d08c18095c6f4af55c771c158f71ccdc1a",
-                    operation="DELETE",
-                    pgid="pgtrigger_update_or_delete_snapshot_delete_7294f",
-                    table="search_docket",
-                    when="AFTER",
-                ),
-            ),
-        ),
-    ]
diff --git a/cl/search/migrations/0032_update_docket_model_docket_number_fields.py b/cl/search/migrations/0032_update_docket_model_docket_number_fields.py
new file mode 100644
index 0000000000..1b85fe86bc
--- /dev/null
+++ b/cl/search/migrations/0032_update_docket_model_docket_number_fields.py
@@ -0,0 +1,152 @@
+# Generated by Django 5.0.7 on 2024-07-25 17:08
+
+import django.db.models.deletion
+import pgtrigger.compiler
+import pgtrigger.migrations
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
+    ]
+
+    operations = [
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="docket",
+            name="update_or_delete_snapshot_delete",
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="docket",
+            name="update_or_delete_snapshot_update",
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="case_type",
+            field=models.CharField(
+                blank=True,
+                db_index=True,
+                help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
+                max_length=5,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="defendant_number",
+            field=models.SmallIntegerField(
+                db_index=True,
+                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
+                null=True,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="judge_initials",
+            field=models.CharField(
+                blank=True,
+                db_index=True,
+                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
+                max_length=4,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="office_code",
+            field=models.CharField(
+                blank=True,
+                db_index=True,
+                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
+                max_length=1,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="parent_docket",
+            field=models.ForeignKey(
+                blank=True,
+                help_text="In criminal cases (and some magistrate) PACER creates a parent docket and one or more child dockets. Child dockets contain docket information for each individual defendant while parent dockets are a superset of all docket entries.",
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name="child_dockets",
+                to="search.docket",
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="case_type",
+            field=models.CharField(
+                blank=True,
+                help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
+                max_length=5,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="defendant_number",
+            field=models.SmallIntegerField(
+                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
+                null=True,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="judge_initials",
+            field=models.CharField(
+                blank=True,
+                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
+                max_length=4,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="office_code",
+            field=models.CharField(
+                blank=True,
+                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
+                max_length=1,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="parent_docket",
+            field=models.ForeignKey(
+                blank=True,
+                db_constraint=False,
+                help_text="In criminal cases (and some magistrate) PACER creates a parent docket and one or more child dockets. Child dockets contain docket information for each individual defendant while parent dockets are a superset of all docket entries.",
+                null=True,
+                on_delete=django.db.models.deletion.DO_NOTHING,
+                related_name="+",
+                related_query_name="+",
+                to="search.docket",
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="docket",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_update",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."parent_docket_id" IS DISTINCT FROM (NEW."parent_docket_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."office_code" IS DISTINCT FROM (NEW."office_code") OR OLD."case_type" IS DISTINCT FROM (NEW."case_type") OR OLD."judge_initials" IS DISTINCT FROM (NEW."judge_initials") OR OLD."defendant_number" IS DISTINCT FROM (NEW."defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))',
+                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
+                    hash="8154023f4bc7281606f9e0be331261fc3cc71241",
+                    operation="UPDATE",
+                    pgid="pgtrigger_update_or_delete_snapshot_update_7e039",
+                    table="search_docket",
+                    when="AFTER",
+                ),
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="docket",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_delete",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
+                    hash="5b02a9c2cedd3b32a8c455a966bf31b02576d67f",
+                    operation="DELETE",
+                    pgid="pgtrigger_update_or_delete_snapshot_delete_7294f",
+                    table="search_docket",
+                    when="AFTER",
+                ),
+            ),
+        ),
+    ]
diff --git a/cl/search/migrations/0032_add_refined_docket_numbering_fields.sql b/cl/search/migrations/0032_update_docket_model_docket_number_fields.sql
similarity index 55%
rename from cl/search/migrations/0032_add_refined_docket_numbering_fields.sql
rename to cl/search/migrations/0032_update_docket_model_docket_number_fields.sql
index 4ae558ce39..881e9a2587 100644
--- a/cl/search/migrations/0032_add_refined_docket_numbering_fields.sql
+++ b/cl/search/migrations/0032_update_docket_model_docket_number_fields.sql
@@ -27,6 +27,10 @@ ALTER TABLE "search_docket" ALTER COLUMN "judge_initials" DROP DEFAULT;
 ALTER TABLE "search_docket" ADD COLUMN "office_code" varchar(1) DEFAULT '' NOT NULL;
 ALTER TABLE "search_docket" ALTER COLUMN "office_code" DROP DEFAULT;
 --
+-- Add field parent_docket to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "parent_docket_id" integer NULL CONSTRAINT "search_docket_parent_docket_id_1a514426_fk_search_docket_id" REFERENCES "search_docket"("id") DEFERRABLE INITIALLY DEFERRED; SET CONSTRAINTS "search_docket_parent_docket_id_1a514426_fk_search_docket_id" IMMEDIATE;
+--
 -- Add field case_type to docketevent
 --
 ALTER TABLE "search_docketevent" ADD COLUMN "case_type" varchar(5) DEFAULT '' NOT NULL;
@@ -46,6 +50,10 @@ ALTER TABLE "search_docketevent" ALTER COLUMN "judge_initials" DROP DEFAULT;
 ALTER TABLE "search_docketevent" ADD COLUMN "office_code" varchar(1) DEFAULT '' NOT NULL;
 ALTER TABLE "search_docketevent" ALTER COLUMN "office_code" DROP DEFAULT;
 --
+-- Add field parent_docket to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "parent_docket_id" integer NULL;
+--
 -- Create trigger update_or_delete_snapshot_update on model docket
 --
 
@@ -83,7 +91,7 @@ ALTER TABLE "search_docketevent" ALTER COLUMN "office_code" DROP DEFAULT;
                             RETURN NEW;
                         END IF;
                     END IF;
-                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
+                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
                 END;
             $$ LANGUAGE plpgsql;
 
@@ -92,10 +100,10 @@ ALTER TABLE "search_docketevent" ALTER COLUMN "office_code" DROP DEFAULT;
                 AFTER UPDATE ON "search_docket"
 
 
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."office_code" IS DISTINCT FROM (NEW."office_code") OR OLD."case_type" IS DISTINCT FROM (NEW."case_type") OR OLD."judge_initials" IS DISTINCT FROM (NEW."judge_initials") OR OLD."defendant_number" IS DISTINCT FROM (NEW."defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."parent_docket_id" IS DISTINCT FROM (NEW."parent_docket_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."office_code" IS DISTINCT FROM (NEW."office_code") OR OLD."case_type" IS DISTINCT FROM (NEW."case_type") OR OLD."judge_initials" IS DISTINCT FROM (NEW."judge_initials") OR OLD."defendant_number" IS DISTINCT FROM (NEW."defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_7e039();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket" IS '4eb11702944d1395e0fd5cc6961c710e6a4cfbe6';
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket" IS '8154023f4bc7281606f9e0be331261fc3cc71241';
 
 --
 -- Create trigger update_or_delete_snapshot_delete on model docket
@@ -135,7 +143,7 @@ ALTER TABLE "search_docketevent" ALTER COLUMN "office_code" DROP DEFAULT;
                             RETURN NEW;
                         END IF;
                     END IF;
-                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
+                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
                 END;
             $$ LANGUAGE plpgsql;
 
@@ -147,7 +155,7 @@ ALTER TABLE "search_docketevent" ALTER COLUMN "office_code" DROP DEFAULT;
                 FOR EACH ROW
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_7294f();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket" IS '9bfc30d08c18095c6f4af55c771c158f71ccdc1a';
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket" IS '5b02a9c2cedd3b32a8c455a966bf31b02576d67f';
 
 CREATE INDEX "search_docket_case_type_f76fd404" ON "search_docket" ("case_type");
 CREATE INDEX "search_docket_case_type_f76fd404_like" ON "search_docket" ("case_type" varchar_pattern_ops);
@@ -156,4 +164,6 @@ CREATE INDEX "search_docket_judge_initials_2513584d" ON "search_docket" ("judge_
 CREATE INDEX "search_docket_judge_initials_2513584d_like" ON "search_docket" ("judge_initials" varchar_pattern_ops);
 CREATE INDEX "search_docket_office_code_51016743" ON "search_docket" ("office_code");
 CREATE INDEX "search_docket_office_code_51016743_like" ON "search_docket" ("office_code" varchar_pattern_ops);
+CREATE INDEX "search_docket_parent_docket_id_1a514426" ON "search_docket" ("parent_docket_id");
+CREATE INDEX "search_docketevent_parent_docket_id_c7c9c9ad" ON "search_docketevent" ("parent_docket_id");
 COMMIT;
\ No newline at end of file

From 1e47ff4b503459047caeda15f5fac4b03e77b59a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 12:56:00 -0600
Subject: [PATCH 088/372] feat(opinion_order): remove django-ordered-model

add order field to opinion model
add unique_together for cluster and order
---
 cl/search/migrations/0032_order_opinions.py  | 26 ++++++--------
 cl/search/migrations/0032_order_opinions.sql | 37 +++++++++-----------
 cl/search/models.py                          |  7 ++--
 cl/settings/django.py                        |  1 -
 poetry.lock                                  | 13 +------
 pyproject.toml                               |  1 -
 6 files changed, 31 insertions(+), 54 deletions(-)

diff --git a/cl/search/migrations/0032_order_opinions.py b/cl/search/migrations/0032_order_opinions.py
index b34bb01d48..dbbe4707d0 100644
--- a/cl/search/migrations/0032_order_opinions.py
+++ b/cl/search/migrations/0032_order_opinions.py
@@ -1,8 +1,8 @@
-# Generated by Django 4.2.1 on 2023-06-15 17:56
+# Generated by Django 5.0.7 on 2024-07-25 17:13
 
-from django.db import migrations, models
 import pgtrigger.compiler
 import pgtrigger.migrations
+from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
@@ -11,10 +11,6 @@ class Migration(migrations.Migration):
     ]
 
     operations = [
-        migrations.AlterModelOptions(
-            name="opinion",
-            options={"ordering": ("order",)},
-        ),
         pgtrigger.migrations.RemoveTrigger(
             model_name="opinion",
             name="update_or_delete_snapshot_delete",
@@ -26,27 +22,25 @@ class Migration(migrations.Migration):
         migrations.AddField(
             model_name="opinion",
             name="order",
-            field=models.PositiveIntegerField(
-                db_index=True, default=1, editable=False, verbose_name="order"
-            ),
-            preserve_default=False,
+            field=models.IntegerField(blank=True, null=True),
         ),
         migrations.AddField(
             model_name="opinionevent",
             name="order",
-            field=models.PositiveIntegerField(
-                default=1, editable=False, verbose_name="order"
-            ),
-            preserve_default=False,
+            field=models.IntegerField(blank=True, null=True),
+        ),
+        migrations.AlterUniqueTogether(
+            name="opinion",
+            unique_together={("cluster", "order")},
         ),
         pgtrigger.migrations.AddTrigger(
             model_name="opinion",
             trigger=pgtrigger.compiler.Trigger(
                 name="update_or_delete_snapshot_update",
                 sql=pgtrigger.compiler.UpsertTriggerSql(
-                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))',
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."order" IS DISTINCT FROM (NEW."order"))',
                     func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
-                    hash="bcac41027f469bbd394e8671cb0b2fa33e7035f3",
+                    hash="89fec08f03e567ec8ecc7cd1e8ec5f665abf9d3b",
                     operation="UPDATE",
                     pgid="pgtrigger_update_or_delete_snapshot_update_67ecd",
                     table="search_opinion",
diff --git a/cl/search/migrations/0032_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql
index 3226cb510b..71161b2370 100644
--- a/cl/search/migrations/0032_order_opinions.sql
+++ b/cl/search/migrations/0032_order_opinions.sql
@@ -1,9 +1,5 @@
 BEGIN;
 --
--- Change Meta options on opinion
---
--- (no-op)
---
 -- Remove trigger update_or_delete_snapshot_delete from model opinion
 --
 DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
@@ -14,13 +10,15 @@ DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "sear
 --
 -- Add field order to opinion
 --
-ALTER TABLE "search_opinion" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0);
-ALTER TABLE "search_opinion" ALTER COLUMN "order" DROP DEFAULT;
+ALTER TABLE "search_opinion" ADD COLUMN "order" integer NULL;
 --
 -- Add field order to opinionevent
 --
-ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0);
-ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
+ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
+--
+-- Alter unique_together for opinion (1 constraint(s))
+--
+ALTER TABLE "search_opinion" ADD CONSTRAINT "search_opinion_cluster_id_order_8426d97d_uniq" UNIQUE ("cluster_id", "order");
 --
 -- Create trigger update_or_delete_snapshot_update on model opinion
 --
@@ -50,7 +48,7 @@ ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_67ecd()
             RETURNS TRIGGER AS $$
-
+                
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -66,13 +64,13 @@ ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd
                 AFTER UPDATE ON "search_opinion"
-
-
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))
+                
+                
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."order" IS DISTINCT FROM (NEW."order"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS 'bcac41027f469bbd394e8671cb0b2fa33e7035f3';
-
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '89fec08f03e567ec8ecc7cd1e8ec5f665abf9d3b';
+        
 --
 -- Create trigger update_or_delete_snapshot_delete on model opinion
 --
@@ -102,7 +100,7 @@ ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_1f4fd()
             RETURNS TRIGGER AS $$
-
+                
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -118,12 +116,11 @@ ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd
                 AFTER DELETE ON "search_opinion"
-
-
-                FOR EACH ROW
+                
+                
+                FOR EACH ROW 
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad';
-
-CREATE INDEX "search_opinion_order_d54dd126" ON "search_opinion" ("order");
+        
 COMMIT;
diff --git a/cl/search/models.py b/cl/search/models.py
index 59ad525e88..9c04940e3e 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -22,7 +22,6 @@
 from localflavor.us.models import USPostalCodeField, USZipCodeField
 from localflavor.us.us_states import OBSOLETE_STATES, USPS_CHOICES
 from model_utils import FieldTracker
-from ordered_model.models import OrderedModel
 
 from cl.citations.utils import get_citation_depth_between_clusters
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -3149,7 +3148,7 @@ def sort_cites(c):
 
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
-class Opinion(OrderedModel, AbstractDateTimeModel):
+class Opinion(AbstractDateTimeModel):
     COMBINED = "010combined"
     UNANIMOUS = "015unamimous"
     LEAD = "020lead"
@@ -3321,10 +3320,10 @@ class Opinion(OrderedModel, AbstractDateTimeModel):
             "sha1",
         ]
     )
-    order_with_respect_to = "cluster"
+    order = models.IntegerField(null=True, blank=True)
 
     class Meta:
-        ordering = ("order",)
+        unique_together = ("cluster", "order")
 
     @property
     def siblings(self) -> QuerySet:
diff --git a/cl/settings/django.py b/cl/settings/django.py
index e6d74c3949..968323bcb3 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -159,7 +159,6 @@
     "django_elasticsearch_dsl",
     "pghistory",
     "pgtrigger",
-    "ordered_model",
     # CourtListener Apps
     "cl.alerts",
     "cl.audio",
diff --git a/poetry.lock b/poetry.lock
index 65e8c26f25..a769a59f6a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1116,17 +1116,6 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
-[[package]]
-name = "django-ordered-model"
-version = "3.7.4"
-description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
-optional = false
-python-versions = "*"
-files = [
-    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
-    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
-]
-
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -5472,4 +5461,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "5334f16d006f7486a5f9b905906f2a9a68e7f524684c04af3d0994ebd0999384"
+content-hash = "e6d34875888f1687912d03d33ea68038bba6c6d487037c6454d5b18449ec6d0c"
diff --git a/pyproject.toml b/pyproject.toml
index 44839b4a8d..33efc0846a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -115,7 +115,6 @@ tiktoken = "^0.6.0"
 hyperscan = "^0.7.7"
 openai = "^1.31.1"
 seal-rookery = "^2.2.3"
-django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From d1058d0b725c429568cb85ba91a6407b9047f7c2 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Thu, 25 Jul 2024 15:49:06 -0400
Subject: [PATCH 089/372] feat(settings): Removes EGRESS_PROXY_HOST env
 variable

---
 cl/api/webhooks.py              |  3 ++-
 cl/corpus_importer/tasks.py     |  5 ++++-
 cl/lib/pacer_session.py         |  2 +-
 cl/lib/tests.py                 |  3 +--
 cl/recap/tasks.py               | 10 +++++-----
 cl/recap/tests.py               |  2 +-
 cl/settings/project/security.py |  5 ++---
 7 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/cl/api/webhooks.py b/cl/api/webhooks.py
index cf6e3f6cab..f6ca97d9e3 100644
--- a/cl/api/webhooks.py
+++ b/cl/api/webhooks.py
@@ -1,4 +1,5 @@
 import json
+import random
 
 import requests
 from django.conf import settings
@@ -38,7 +39,7 @@ def send_webhook_event(
     the webhook is sent.
     """
     proxy_server = {
-        "http": settings.EGRESS_PROXY_HOST,  # type: ignore
+        "http": random.choice(settings.EGRESS_PROXY_HOSTS),  # type: ignore
     }
     headers = {
         "Content-type": "application/json",
diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 4ff3b83371..3409cf1f29 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -998,7 +998,10 @@ def get_pacer_case_id_and_title(
         cookies_from_cache = get_pacer_cookie_from_cache(user_pk)
         if isinstance(cookies_from_cache, tuple):
             cookies, proxy_address = cookies_from_cache
-        cookies, proxy_address = cookies_from_cache, settings.EGRESS_PROXY_HOST
+        cookies, proxy_address = (
+            cookies_from_cache,
+            settings.EGRESS_PROXY_HOSTS[0],
+        )
     else:
         raise Exception(
             "user_pk is unavailable, cookies cannot be retrieved from cache"
diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py
index 2fa6094060..20cb7d5c05 100644
--- a/cl/lib/pacer_session.py
+++ b/cl/lib/pacer_session.py
@@ -143,7 +143,7 @@ def get_or_cache_pacer_cookies(
         # cookies were found in cache and ttl >= 5 minutes, return them
         if isinstance(cookies_data, tuple):
             return cookies_data
-        return cookies_data, settings.EGRESS_PROXY_HOST
+        return cookies_data, settings.EGRESS_PROXY_HOSTS[0]
 
     # Unable to find cookies in cache, are about to expire or refresh needed
     # Login and cache new values.
diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index da59a87964..43d8696123 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -123,7 +123,6 @@ def setUp(self) -> None:
     def test_pick_random_proxy_when_list_is_available(self):
         """Does ProxyPacerSession choose a random proxy from the available list?"""
         session = ProxyPacerSession(username="test", password="password")
-        self.assertNotEqual(session.proxy_address, settings.EGRESS_PROXY_HOST)
         self.assertIn(
             session.proxy_address,
             ["http://proxy_1:9090", "http://proxy_2:9090"],
@@ -136,7 +135,7 @@ def test_use_default_proxy_host_for_old_cookie_format(self):
         )
         self.assertIsInstance(cookies_data, tuple)
         _, proxy = cookies_data
-        self.assertEqual(proxy, settings.EGRESS_PROXY_HOST)
+        self.assertEqual(proxy, settings.EGRESS_PROXY_HOSTS[0])
 
     @patch("cl.lib.pacer_session.log_into_pacer")
     def test_compute_new_cookies_with_new_format(self, mock_log_into_pacer):
diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index 025ca163d7..f835c79a5f 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -1650,7 +1650,7 @@ def fetch_pacer_doc_by_rd(
     cookies_data = (
         cookies
         if isinstance(cookies, tuple)
-        else (cookies, settings.EGRESS_PROXY_HOST)
+        else (cookies, settings.EGRESS_PROXY_HOSTS[0])
     )
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
     try:
@@ -1753,7 +1753,7 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> None:
     cookies_data = (
         cookies
         if isinstance(cookies, tuple)
-        else (cookies, settings.EGRESS_PROXY_HOST)
+        else (cookies, settings.EGRESS_PROXY_HOSTS[0])
     )
     try:
         r = get_att_report_by_rd(rd, cookies_data)
@@ -1938,7 +1938,7 @@ def fetch_docket(self, fq_pk):
     cookies, proxy_address = (
         cookies_data
         if isinstance(cookies_data, tuple)
-        else (cookies_data, settings.EGRESS_PROXY_HOST)
+        else (cookies_data, settings.EGRESS_PROXY_HOSTS[0])
     )
     s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     try:
@@ -2282,7 +2282,7 @@ def get_and_copy_recap_attachment_docs(
     cookies_data = (
         cookies
         if isinstance(cookies, tuple)
-        else (cookies, settings.EGRESS_PROXY_HOST)
+        else (cookies, settings.EGRESS_PROXY_HOSTS[0])
     )
     appellate = False
     unique_pqs = []
@@ -2398,7 +2398,7 @@ def get_and_merge_rd_attachments(
     cookies_data = (
         cookies
         if isinstance(cookies, tuple)
-        else (cookies, settings.EGRESS_PROXY_HOST)
+        else (cookies, settings.EGRESS_PROXY_HOSTS[0])
     )
     # Try to get the attachment page without being logged into PACER
     att_report_text = get_attachment_page_by_url(document_url, court_id)
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 0938af0199..a1c91ffce1 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -5407,7 +5407,7 @@ def test_clean_up_recap_document_file(self, mock_open):
 )
 @mock.patch(
     "cl.recap.tasks.get_or_cache_pacer_cookies",
-    side_effect=lambda x, y, z: ("Cookie", settings.EGRESS_PROXY_HOST),
+    side_effect=lambda x, y, z: ("Cookie", settings.EGRESS_PROXY_HOSTS[0]),
 )
 @mock.patch(
     "cl.recap.tasks.get_pacer_cookie_from_cache",
diff --git a/cl/settings/project/security.py b/cl/settings/project/security.py
index a5af603077..e13eb87bcc 100644
--- a/cl/settings/project/security.py
+++ b/cl/settings/project/security.py
@@ -13,10 +13,9 @@
     "ALLOWED_HOSTS", default=["www.courtlistener.com"]
 )
 
-EGRESS_PROXY_HOST = env(
-    "EGRESS_PROXY_HOST", default="http://cl-webhook-sentry:9090"
+EGRESS_PROXY_HOSTS: list[str] = env.list(
+    "EGRESS_PROXY_HOSTS", default=["http://cl-webhook-sentry:9090"]
 )
-EGRESS_PROXY_HOSTS: list[str] = env.list("EGRESS_PROXY_HOSTS", default=[])
 
 SECURE_HSTS_SECONDS = 63_072_000
 SECURE_HSTS_INCLUDE_SUBDOMAINS = True

From 32821a5914afd2a96c139047e336f7fb293b2e65 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 17:32:05 -0600
Subject: [PATCH 090/372] feat(opinion_order): update tests

update fixtures
add unique constraint
update migrations
---
 cl/search/fixtures/test_objects_search.json  |  4 +-
 cl/search/migrations/0032_order_opinions.py  | 16 ++++---
 cl/search/migrations/0032_order_opinions.sql |  8 ++--
 cl/search/models.py                          | 12 ++++-
 cl/search/tests/tests.py                     | 47 ++++++++++----------
 5 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index 9fddb84fca..7ae3da4163 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -332,7 +332,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "order": 2
     },
     "model": "search.opinion",
     "pk": 5
@@ -355,7 +355,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "order": 3
     },
     "model": "search.opinion",
     "pk": 6
diff --git a/cl/search/migrations/0032_order_opinions.py b/cl/search/migrations/0032_order_opinions.py
index dbbe4707d0..9c7f3fa5d3 100644
--- a/cl/search/migrations/0032_order_opinions.py
+++ b/cl/search/migrations/0032_order_opinions.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-07-25 17:13
+# Generated by Django 5.0.7 on 2024-07-25 23:17
 
 import pgtrigger.compiler
 import pgtrigger.migrations
@@ -7,6 +7,10 @@
 
 class Migration(migrations.Migration):
     dependencies = [
+        (
+            "people_db",
+            "0016_remove_abarating_update_or_delete_snapshot_update_and_more",
+        ),
         ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
     ]
 
@@ -29,10 +33,6 @@ class Migration(migrations.Migration):
             name="order",
             field=models.IntegerField(blank=True, null=True),
         ),
-        migrations.AlterUniqueTogether(
-            name="opinion",
-            unique_together={("cluster", "order")},
-        ),
         pgtrigger.migrations.AddTrigger(
             model_name="opinion",
             trigger=pgtrigger.compiler.Trigger(
@@ -62,4 +62,10 @@ class Migration(migrations.Migration):
                 ),
             ),
         ),
+        migrations.AddConstraint(
+            model_name="opinion",
+            constraint=models.UniqueConstraint(
+                fields=("cluster_id", "order"), name="unique_opinion_order"
+            ),
+        ),
     ]
diff --git a/cl/search/migrations/0032_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql
index 71161b2370..01cac8adf7 100644
--- a/cl/search/migrations/0032_order_opinions.sql
+++ b/cl/search/migrations/0032_order_opinions.sql
@@ -16,10 +16,6 @@ ALTER TABLE "search_opinion" ADD COLUMN "order" integer NULL;
 --
 ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
 --
--- Alter unique_together for opinion (1 constraint(s))
---
-ALTER TABLE "search_opinion" ADD CONSTRAINT "search_opinion_cluster_id_order_8426d97d_uniq" UNIQUE ("cluster_id", "order");
---
 -- Create trigger update_or_delete_snapshot_update on model opinion
 --
 
@@ -123,4 +119,8 @@ ALTER TABLE "search_opinion" ADD CONSTRAINT "search_opinion_cluster_id_order_842
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad';
         
+--
+-- Create constraint unique_opinion_order on model opinion
+--
+ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_order" UNIQUE ("cluster_id", "order");
 COMMIT;
diff --git a/cl/search/models.py b/cl/search/models.py
index 9c04940e3e..d6c17ba4f8 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3323,7 +3323,11 @@ class Opinion(AbstractDateTimeModel):
     order = models.IntegerField(null=True, blank=True)
 
     class Meta:
-        unique_together = ("cluster", "order")
+        constraints = [
+            models.UniqueConstraint(
+                fields=["cluster_id", "order"], name="unique_opinion_order"
+            )
+        ]
 
     @property
     def siblings(self) -> QuerySet:
@@ -3350,6 +3354,12 @@ def save(
         *args: List,
         **kwargs: Dict,
     ) -> None:
+        if self.pk is None and self.order is None:
+            # Add order in new opinions with no defined order value
+            last_position = Opinion.objects.filter(
+                cluster=self.cluster
+            ).aggregate(models.Max("order"))["order__max"]
+            self.order = (last_position or 0) + 1
         super().save(*args, **kwargs)
         if index:
             from cl.search.tasks import add_items_to_solr
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index 740bcb3156..5c57cf72bd 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -65,7 +65,6 @@
     OpinionClusterFactory,
     OpinionClusterFactoryWithChildrenAndParents,
     OpinionFactory,
-    OpinionsCitedWithParentsFactory,
     OpinionWithChildrenFactory,
     OpinionWithParentsFactory,
     RECAPDocumentFactory,
@@ -301,7 +300,7 @@ def test_custom_manager_chained_filter(self) -> None:
         self.assertEqual(cluster_count, expected_count)
 
     def test_opinions_order(self) -> None:
-        """Test django-ordered-model library"""
+        """Test opinions order"""
 
         # Create court
         court = CourtFactory(id="nyappdiv")
@@ -336,32 +335,34 @@ def test_opinions_order(self) -> None:
 
         # Test that the value of the order field matches the order in which
         # they were created
-        self.assertEqual(op_1.order, 0)
-        self.assertEqual(op_2.order, 1)
-        self.assertEqual(op_3.order, 2)
+        self.assertEqual(op_1.order, 1)
+        self.assertEqual(op_2.order, 2)
+        self.assertEqual(op_3.order, 3)
 
-        # Use library method to move lead opinion to first position, we can
-        # use this function to easily reorder existing opinions
-        op_3.to(0)
+        # Can we update an opinion using an existing position?
+        with transaction.atomic():
+            with self.assertRaises(IntegrityError):
+                op_3.order = 2
+                op_3.save()
 
-        # The position of the elements was modified, we refresh the objects
-        op_1.refresh_from_db()
-        op_2.refresh_from_db()
-        op_3.refresh_from_db()
+        # Can we create an opinion using an existing position?
+        with transaction.atomic():
+            with self.assertRaises(IntegrityError):
+                op_4 = OpinionFactory(
+                    cluster=cluster, type="Lead Opinion", order=1
+                )
 
-        # Test new order
-        self.assertEqual(op_3.order, 0)
-        self.assertEqual(op_1.order, 1)
-        self.assertEqual(op_2.order, 2)
+        # Can we use negative positions?
+        op_4 = OpinionFactory(cluster=cluster, type="Lead Opinion", order=-1)
+        self.assertEqual(op_4.order, -1)
 
-        # Add new opinion to cluster
-        op_4 = OpinionFactory(
-            cluster=cluster,
-            type="Dissent",
+        # Can we order the opinions from a cluster using the field?
+        qs = (
+            cluster.sub_opinions.all()
+            .order_by("order")
+            .values_list("order", flat=True)
         )
-
-        # Test that the new opinion is in last place
-        self.assertEqual(op_4.order, 3)
+        self.assertEqual(list(qs), [-1, 1, 2, 3])
 
 
 class DocketValidationTest(TestCase):

From 5a6764e1bf45b99eda5b5f265e713a1665784e72 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 17:51:30 -0600
Subject: [PATCH 091/372] feat(opinion_order): update fixture

---
 cl/search/fixtures/test_objects_search.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index 7ae3da4163..e9a89f1ea5 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -309,7 +309,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "order": 2
     },
     "model": "search.opinion",
     "pk": 4
@@ -332,7 +332,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 2
+      "order": 3
     },
     "model": "search.opinion",
     "pk": 5
@@ -355,7 +355,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 3
+      "order": 4
     },
     "model": "search.opinion",
     "pk": 6

From 37eb6bc82109dca97ed55b90575ba9a8e892def1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 18:03:03 -0600
Subject: [PATCH 092/372] feat(opinion_order): update fixture
 test_objects_query_counts.json

---
 cl/search/fixtures/test_objects_query_counts.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json
index b51117602a..af8b7f3e54 100644
--- a/cl/search/fixtures/test_objects_query_counts.json
+++ b/cl/search/fixtures/test_objects_query_counts.json
@@ -375,7 +375,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "order": 2
       },
       "model":"search.opinion",
       "pk":4
@@ -400,7 +400,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "order": 3
       },
       "model":"search.opinion",
       "pk":5
@@ -424,7 +424,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "order": 4
       },
       "model":"search.opinion",
       "pk":6

From 4b4d97fdaaac94d69bf5815abd358bd080a7064c Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 18:13:01 -0600
Subject: [PATCH 093/372] feat(opinion_order): update fixture
 opinions-issue-550.json and functest_opinions.json

---
 cl/search/fixtures/functest_opinions.json  | 2 +-
 cl/search/fixtures/opinions-issue-550.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json
index 45f5f0b759..6bc9333003 100644
--- a/cl/search/fixtures/functest_opinions.json
+++ b/cl/search/fixtures/functest_opinions.json
@@ -187,7 +187,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "order": 2
     },
     "model": "search.opinion",
     "pk": 12
diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json
index 829a94c7d2..c5f07cea17 100644
--- a/cl/search/fixtures/opinions-issue-550.json
+++ b/cl/search/fixtures/opinions-issue-550.json
@@ -88,7 +88,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "order": 1
+      "order": 2
     },
     "model": "search.opinion",
     "pk": 11

From 81a007815ec4ab71ec531941806581ec755d465d Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Fri, 26 Jul 2024 11:12:44 -0500
Subject: [PATCH 094/372] fix(cl_scrape_opinions): save opinion.author when
 there is a single Person match

---
 cl/scrapers/management/commands/cl_scrape_opinions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index e84c5c99a1..5f4469de23 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -189,7 +189,7 @@ def save_everything(
         candidate = async_to_sync(lookup_judges_by_messy_str)(
             opinion.author_str, docket.court.pk, cluster.date_filed
         )
-        if candidate:
+        if len(candidate) == 1:
             opinion.author = candidate[0]
 
     if cluster.judges:

From 2cc8f082595ade3c6427ef47e41665a8d1650a17 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 26 Jul 2024 14:40:27 -0500
Subject: [PATCH 095/372] feat(corpus_importer): Store HTMLs from Case Query
 crawl

---
 cl/corpus_importer/tasks.py | 38 +++++++++++++++++++++++--------------
 cl/corpus_importer/tests.py | 14 +++++++++++++-
 cl/recap/mergers.py         | 24 +++++++++++++++++++++++
 cl/scrapers/tasks.py        |  1 +
 cl/tests/fakes.py           | 11 +++++++++++
 5 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 010239917e..4f8b1d3872 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -1158,12 +1158,12 @@ def filter_docket_by_tags(
 
 def query_case_query_report(
     court_id: str, pacer_case_id: int
-) -> dict[str, Any]:
+) -> tuple[dict[str, Any], str]:
     """Query the iquery page for a given PACER case ID.
 
     :param court_id: A CL court ID where we'll look things up.
     :param pacer_case_id: The Pacer Case ID to lookup.
-    :return: The report.data.
+    :return: A twp tuple, the report data and the report HTML text.
     """
 
     cookies = get_or_cache_pacer_cookies(
@@ -1178,7 +1178,7 @@ def query_case_query_report(
     )
     report = CaseQuery(map_cl_to_pacer_id(court_id), s)
     report.query(pacer_case_id)
-    return report.data
+    return report.data, report.response.text
 
 
 def make_docket_by_iquery_base(
@@ -1207,7 +1207,9 @@ def make_docket_by_iquery_base(
     """
 
     try:
-        report_data = query_case_query_report(court_id, pacer_case_id)
+        report_data, report_text = query_case_query_report(
+            court_id, pacer_case_id
+        )
     except (requests.Timeout, requests.RequestException) as exc:
         logger.warning(
             "Timeout or unknown RequestException on iquery crawl. "
@@ -1245,6 +1247,7 @@ def make_docket_by_iquery_base(
     return save_iquery_to_docket(
         self,
         report_data,
+        report_text,
         d,
         tag_names,
         add_to_solr=True,
@@ -1348,24 +1351,25 @@ def make_docket_by_iquery_sweep(
 @retry((requests.Timeout, PacerLoginException), tries=3, delay=0.25, backoff=1)
 def query_iquery_page(
     court_id: str, pacer_case_id: int
-) -> bool | dict[str, Any]:
+) -> tuple[bool, None] | tuple[dict[str, Any], str]:
     """A small wrapper to query the iquery page for a given PACER case ID to
     support retries via the @retry decorator in case of a failure.
 
     :param court_id: A CL court ID where we'll look things up.
     :param pacer_case_id: The Pacer Case ID to lookup.
-    :return: False if not valid report, otherwise the report.data.
+    :return: A two tuple, False and None if not a valid report or the report data
+    and the report HTML text.
     """
 
-    report_data = query_case_query_report(court_id, pacer_case_id)
+    report_data, report_text = query_case_query_report(court_id, pacer_case_id)
     if not report_data:
         logger.info(
             "No valid data found in iquery page for %s.%s",
             court_id,
             pacer_case_id,
         )
-        return False
-    return report_data
+        return False, None
+    return report_data, report_text
 
 
 @app.task(
@@ -1402,7 +1406,9 @@ def probe_iquery_pages(
         )
         probe_iteration += 1
         try:
-            report_data = query_iquery_page(court_id, pacer_case_id_to_lookup)
+            report_data, report_text = query_iquery_page(
+                court_id, pacer_case_id_to_lookup
+            )
         except HTTPError:
             # Set expiration accordingly and value to 2 to difference from
             # other waiting times.
@@ -1456,7 +1462,9 @@ def probe_iquery_pages(
 
         if report_data:
             # Find and update/store the Docket.
-            reports_data.append((pacer_case_id_to_lookup, report_data))
+            reports_data.append(
+                (pacer_case_id_to_lookup, report_data, report_text)
+            )
             latest_match = pacer_case_id_to_lookup
             found_match = True
             # Restart court_blocked_attempts and court_empty_probe_attempts.
@@ -1496,15 +1504,17 @@ def probe_iquery_pages(
     # Process all the reports retrieved during the probing.
     # Avoid triggering the iQuery sweep signal except for the latest hit.
     avoid_trigger_signal = True
-    for index, report_data in enumerate(reports_data):
+    for index, report_content in enumerate(reports_data):
+        pacer_case_id, report_data, report_text = report_content
         if index == len(reports_data) - 1:
             # Only trigger the sweep signal on the last hit.
             avoid_trigger_signal = False
         try:
             process_case_query_report(
                 court_id,
-                pacer_case_id=report_data[0],
-                report_data=report_data[1],
+                pacer_case_id=pacer_case_id,
+                report_data=report_data,
+                report_text=report_text,
                 avoid_trigger_signal=avoid_trigger_signal,
             )
         except IntegrityError:
diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index 6d3ebd931a..a66dd4b2d6 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -94,7 +94,7 @@
     find_just_name,
 )
 from cl.people_db.models import Attorney, AttorneyOrganization, Party
-from cl.recap.models import UPLOAD_TYPE
+from cl.recap.models import UPLOAD_TYPE, PacerHtmlFiles
 from cl.recap_rss.models import RssItemCache
 from cl.scrapers.models import PACERFreeDocumentRow
 from cl.search.factories import (
@@ -3692,6 +3692,7 @@ def test_update_latest_case_id_and_schedule_iquery_sweep_integration(
             dispatch_uid=test_dispatch_uid,
         )
         try:
+            pacer_files = PacerHtmlFiles.objects.all()
             dockets = Docket.objects.filter(court_id=self.court_gand)
             self.assertEqual(dockets.count(), 0)
 
@@ -3797,6 +3798,9 @@ def test_update_latest_case_id_and_schedule_iquery_sweep_integration(
             self.assertEqual(
                 dockets.count(), 5, msg="Wrong number of dockets returned."
             )
+            # Two PACER HTML files should be stored by now via iquery sweep.
+            self.assertEqual(2, pacer_files.count())
+
             highest_known_pacer_case_id = r.hget(
                 "iquery:highest_known_pacer_case_id", self.court_gand.pk
             )
@@ -3859,6 +3863,14 @@ def test_update_latest_case_id_and_schedule_iquery_sweep_integration(
             self.assertEqual(
                 dockets.count(), 5, msg="Docket number doesn't match."
             )
+            # 7 additional PACER HTML files should be stored by now, 3 added by the
+            # probing task + 4 added by the sweep task.
+            pacer_files = PacerHtmlFiles.objects.all()
+            self.assertEqual(9, pacer_files.count())
+            # Assert HTML content was properly stored in one of them.
+            self.assertEqual(
+                "<span>Test</span>", pacer_files[0].filepath.read().decode()
+            )
 
             ### Integration test probing task + sweep
             # IQUERY_SWEEP_UPLOADS_SIGNAL_ENABLED False
diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 5654275ada..a119de4338 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -1691,6 +1691,7 @@ async def merge_attachment_page_data(
 def save_iquery_to_docket(
     self,
     iquery_data: Dict[str, str],
+    iquery_text: str,
     d: Docket,
     tag_names: Optional[List[str]],
     add_to_solr: bool = False,
@@ -1700,6 +1701,7 @@ def save_iquery_to_docket(
 
     :param self: The celery task calling this function
     :param iquery_data: The data from a successful iquery response
+    :param iquery_text: The HTML text data from a successful iquery response
     :param d: A docket object to work with
     :param tag_names: Tags to add to the items
     :param add_to_solr: Whether to save the completed docket to solr
@@ -1725,6 +1727,16 @@ def save_iquery_to_docket(
     if add_to_solr:
         add_items_to_solr([d.pk], "search.Docket")
     logger.info(f"Created/updated docket: {d}")
+
+    # Add the CASE_QUERY_PAGE to the docket in case we need it someday.
+    pacer_file = PacerHtmlFiles.objects.create(
+        content_object=d, upload_type=UPLOAD_TYPE.CASE_QUERY_PAGE
+    )
+    pacer_file.filepath.save(
+        "case_report.html",  # We only care about the ext w/S3PrivateUUIDStorageTest
+        ContentFile(iquery_text.encode()),
+    )
+
     return d.pk
 
 
@@ -1772,6 +1784,7 @@ def process_case_query_report(
     court_id: str,
     pacer_case_id: int,
     report_data: dict[str, Any],
+    report_text: str,
     avoid_trigger_signal: bool = False,
 ) -> None:
     """Process the case query report from probe_iquery_pages task.
@@ -1781,6 +1794,7 @@ def process_case_query_report(
     :param court_id:  A CL court ID where we'll look things up.
     :param pacer_case_id: The internal PACER case ID number
     :param report_data: A dictionary containing report data.
+    :param report_text: The HTML text data from a successful iquery response
     :param avoid_trigger_signal:  Whether to avoid triggering the iquery sweep
     signal. Useful for ignoring reports added by the probe daemon or the iquery
     sweep itself.
@@ -1802,4 +1816,14 @@ def process_case_query_report(
     logger.info(
         f"Created/updated docket: {d} from court: {court_id} and pacer_case_id {pacer_case_id}"
     )
+
+    # Add the CASE_QUERY_PAGE to the docket in case we need it someday.
+    pacer_file = PacerHtmlFiles.objects.create(
+        content_object=d, upload_type=UPLOAD_TYPE.CASE_QUERY_PAGE
+    )
+    pacer_file.filepath.save(
+        "case_report.html",
+        # We only care about the ext w/S3PrivateUUIDStorageTest
+        ContentFile(report_text.encode()),
+    )
     return None
diff --git a/cl/scrapers/tasks.py b/cl/scrapers/tasks.py
index 85f0af1796..d5609d55da 100644
--- a/cl/scrapers/tasks.py
+++ b/cl/scrapers/tasks.py
@@ -438,6 +438,7 @@ def update_docket_info_iquery(self, d_pk: int, court_id: str) -> None:
     save_iquery_to_docket(
         self,
         report.data,
+        report.response.text,
         d,
         tag_names=None,
         add_to_solr=True,
diff --git a/cl/tests/fakes.py b/cl/tests/fakes.py
index b974cf177e..155d4e3aa9 100644
--- a/cl/tests/fakes.py
+++ b/cl/tests/fakes.py
@@ -163,6 +163,13 @@ def data(self, *args, **kwargs):
 }
 
 
+class FakeCaseQueryResponse:
+    """Mock a Fake CaseQuery Request Response"""
+
+    def __init__(self, text):
+        self.text = text
+
+
 class FakeCaseQueryReport:
 
     def __init__(self, court_id, pacer_session=None):
@@ -183,3 +190,7 @@ def data(self):
         if test_pattern and test_pattern.get(self.pacer_case_id):
             return CaseQueryDataFactory()
         return None
+
+    @property
+    def response(self):
+        return FakeCaseQueryResponse("<span>Test</span>")

From 03fb70159299703ab0d0fe85ac011c86ff6fda43 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 29 Jul 2024 21:12:16 -0500
Subject: [PATCH 096/372] fix(juriscraper_utils): handle delaware edge case

Fixed bug introduced when first working on #4193

Handles the single edge case where the court_id is not the same
as the juriscraper's scraper file name: `del` vs `"juriscraper.opinions.united_states.state.del"`. This was causing a failure whenever
we tried to scrape `del`
---
 cl/lib/juriscraper_utils.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/cl/lib/juriscraper_utils.py b/cl/lib/juriscraper_utils.py
index f74f8165cb..ae8c090f41 100644
--- a/cl/lib/juriscraper_utils.py
+++ b/cl/lib/juriscraper_utils.py
@@ -1,5 +1,6 @@
 import importlib
 import pkgutil
+import re
 
 import juriscraper
 
@@ -16,6 +17,12 @@ def get_scraper_object_by_name(court_id: str, juriscraper_module: str = ""):
     :rtype: juriscraper.AbstractSite.Site
     """
     if juriscraper_module:
+        if re.search(r"\.del$", juriscraper_module):
+            # edge case where the module name is not the same as the court id
+            juriscraper_module = juriscraper_module.replace(
+                ".del", ".delaware"
+            )
+
         return importlib.import_module(juriscraper_module).Site()
 
     for _, full_module_path, _ in pkgutil.walk_packages(

From e34f3d53373b75a3f9f5b24ed6f95e63840e4f37 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 29 Jul 2024 22:18:48 -0400
Subject: [PATCH 097/372] feat(pacer): Adds dataclass for storing PACER session
 data

---
 cl/corpus_importer/bulk_utils.py              |  19 +-
 .../management/commands/760_project.py        |   9 +-
 .../management/commands/adelman_david.py      |  11 +-
 .../management/commands/buchwald_project.py   |   6 +-
 .../commands/buried_alive_project.py          |   4 +-
 .../management/commands/everything_project.py |   7 +-
 .../management/commands/export_control.py     |   4 +-
 .../management/commands/import_patent.py      |  10 +-
 .../management/commands/invoice_project.py    |   7 +-
 .../management/commands/jackson_project.py    |   6 +-
 .../management/commands/kessler_ilnb.py       |  19 +-
 .../management/commands/legal_robot.py        |   6 +-
 .../commands/list_of_creditors_project.py     |   4 +-
 .../management/commands/nos_700.py            |   7 +-
 .../management/commands/nywb_chapter_7.py     |  16 +-
 cl/corpus_importer/task_canvases.py           |  10 +-
 cl/corpus_importer/tasks.py                   | 194 +++++++++---------
 cl/corpus_importer/tests.py                   |  10 +-
 cl/lib/pacer_session.py                       |  45 +++-
 cl/lib/tests.py                               |  72 ++++---
 .../commands/merge_idb_into_dockets.py        |   4 +-
 cl/recap/tasks.py                             |  65 +++---
 cl/recap/tests.py                             |   5 +-
 cl/scrapers/tasks.py                          |   6 +-
 24 files changed, 294 insertions(+), 252 deletions(-)

diff --git a/cl/corpus_importer/bulk_utils.py b/cl/corpus_importer/bulk_utils.py
index 711f768366..66e45fdc86 100644
--- a/cl/corpus_importer/bulk_utils.py
+++ b/cl/corpus_importer/bulk_utils.py
@@ -6,7 +6,7 @@
 from cl.corpus_importer.tasks import get_pacer_doc_by_rd
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.lib.scorched_utils import ExtraSolrInterface
 from cl.lib.search_utils import build_main_query_from_query_string
 from cl.scrapers.tasks import extract_recap_pdf
@@ -75,10 +75,10 @@ def get_petitions(
     )
     q = options["queue"]
     throttle = CeleryThrottle(queue_name=q)
-    pacer_session = ProxyPacerSession(
+    session = ProxyPacerSession(
         username=pacer_username, password=pacer_password
     )
-    pacer_session.login()
+    session.login()
     for i, rd_pk in enumerate(rds):
         if i < options["offset"]:
             i += 1
@@ -87,18 +87,19 @@ def get_petitions(
             break
 
         if i % 1000 == 0:
-            pacer_session = ProxyPacerSession(
+            session = ProxyPacerSession(
                 username=pacer_username, password=pacer_password
             )
-            pacer_session.login()
+            session.login()
             logger.info(f"Sent {i} tasks to celery so far.")
         logger.info("Doing row %s", i)
         throttle.maybe_wait()
-        cookies_data = pacer_session.cookies, pacer_session.proxy_address
         chain(
-            get_pacer_doc_by_rd.s(rd_pk, cookies_data, tag=tag_petitions).set(
-                queue=q
-            ),
+            get_pacer_doc_by_rd.s(
+                rd_pk,
+                SessionData(session.cookies, session.proxy_address),
+                tag=tag_petitions,
+            ).set(queue=q),
             extract_recap_pdf.si(rd_pk).set(queue=q),
             add_items_to_solr.si([rd_pk], "search.RECAPDocument").set(queue=q),
         ).apply_async()
diff --git a/cl/corpus_importer/management/commands/760_project.py b/cl/corpus_importer/management/commands/760_project.py
index 37c5785eef..2abedf3e54 100644
--- a/cl/corpus_importer/management/commands/760_project.py
+++ b/cl/corpus_importer/management/commands/760_project.py
@@ -13,7 +13,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.search.models import Court, RECAPDocument
 from cl.search.tasks import add_or_update_recap_docket
 
@@ -36,6 +36,7 @@ def get_dockets(options):
         username=PACER_USERNAME, password=PACER_PASSWORD
     )
     session.login()
+    session_data = SessionData(session.cookies, session.proxy_address)
     for i, row in enumerate(reader):
         if i < options["offset"]:
             continue
@@ -55,7 +56,7 @@ def get_dockets(options):
                 get_appellate_docket_by_docket_number.s(
                     docket_number=row["Cleaned case_No"],
                     court_id=row["fjc_court_id"],
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data_data=session_data,
                     tag_names=[TAG],
                     **{
                         "show_docket_entries": True,
@@ -75,12 +76,12 @@ def get_dockets(options):
                     pass_through=None,
                     docket_number=row["Cleaned case_No"],
                     court_id=row["fjc_court_id"],
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data_data=session_data,
                     case_name=row["Title"],
                 ).set(queue=q),
                 get_docket_by_pacer_case_id.s(
                     court_id=row["fjc_court_id"],
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data_data=session_data,
                     tag_names=[TAG],
                     **{
                         "show_parties_and_counsel": True,
diff --git a/cl/corpus_importer/management/commands/adelman_david.py b/cl/corpus_importer/management/commands/adelman_david.py
index 2844e4ff2b..25aa72db2f 100644
--- a/cl/corpus_importer/management/commands/adelman_david.py
+++ b/cl/corpus_importer/management/commands/adelman_david.py
@@ -12,7 +12,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import CommandUtils, VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.search.tasks import add_or_update_recap_docket
 
 PACER_USERNAME = os.environ.get("PACER_USERNAME", settings.PACER_USERNAME)
@@ -33,6 +33,7 @@ def download_dockets(options):
         username=PACER_USERNAME, password=PACER_PASSWORD
     )
     session.login()
+    session_data = SessionData(session.cookies, session.proxy_address)
     for i, row in enumerate(reader):
         if i < options["offset"]:
             continue
@@ -48,7 +49,7 @@ def download_dockets(options):
                 get_appellate_docket_by_docket_number.s(
                     docket_number=row["docket_no1"],
                     court_id=row["cl_court"],
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data=session_data,
                     tag_names=[PROJECT_TAG_NAME, row_tag],
                     # Do not get the docket entries for now. We're only
                     # interested in the date terminated. If it's an open case,
@@ -71,17 +72,17 @@ def download_dockets(options):
                     pass_through=None,
                     docket_number=row["docket_no1"],
                     court_id=row["cl_court"],
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data=session_data,
                     case_name=row["name"],
                 ).set(queue=q),
                 do_case_query_by_pacer_case_id.s(
                     court_id=row["cl_court"],
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data=session_data,
                     tag_names=[PROJECT_TAG_NAME, row_tag],
                 ).set(queue=q),
                 get_docket_by_pacer_case_id.s(
                     court_id=row["cl_court"],
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data=session_data,
                     tag_names=[PROJECT_TAG_NAME, row_tag],
                     **{
                         # No docket entries
diff --git a/cl/corpus_importer/management/commands/buchwald_project.py b/cl/corpus_importer/management/commands/buchwald_project.py
index 6b6dbba000..ba10538152 100644
--- a/cl/corpus_importer/management/commands/buchwald_project.py
+++ b/cl/corpus_importer/management/commands/buchwald_project.py
@@ -13,7 +13,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.search.models import Docket
 from cl.search.tasks import add_or_update_recap_docket
 
@@ -104,7 +104,9 @@ def get_dockets(options):
             get_docket_by_pacer_case_id.s(
                 data={"pacer_case_id": d.pacer_case_id},
                 court_id=d.court_id,
-                cookies_data=(session.cookies, session.proxy_address),
+                session_data=SessionData(
+                    session.cookies, session.proxy_address
+                ),
                 docket_pk=d.pk,
                 tag_names=[BUCKWALD_TAG],
                 **{
diff --git a/cl/corpus_importer/management/commands/buried_alive_project.py b/cl/corpus_importer/management/commands/buried_alive_project.py
index f84ee6a16a..d81a4d2185 100644
--- a/cl/corpus_importer/management/commands/buried_alive_project.py
+++ b/cl/corpus_importer/management/commands/buried_alive_project.py
@@ -7,7 +7,7 @@
 from cl.corpus_importer.tasks import get_docket_by_pacer_case_id
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.lib.scorched_utils import ExtraSolrInterface
 from cl.lib.search_utils import build_main_query_from_query_string
 from cl.search.models import Docket
@@ -64,7 +64,7 @@ def get_pacer_dockets(options, docket_pks, tags):
             get_docket_by_pacer_case_id.s(
                 {"pacer_case_id": d.pacer_case_id, "docket_pk": d.pk},
                 d.court_id,
-                cookies_data=(
+                session_data=SessionData(
                     pacer_session.cookies,
                     pacer_session.proxy_address,
                 ),
diff --git a/cl/corpus_importer/management/commands/everything_project.py b/cl/corpus_importer/management/commands/everything_project.py
index a74b9328ff..b48dd4a008 100644
--- a/cl/corpus_importer/management/commands/everything_project.py
+++ b/cl/corpus_importer/management/commands/everything_project.py
@@ -11,7 +11,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.recap.constants import (
     CIVIL_RIGHTS_ACCOMMODATIONS,
     CIVIL_RIGHTS_ADA_EMPLOYMENT,
@@ -136,18 +136,19 @@ def get_dockets(options, items, tags, sample_size=0, doc_num_end=""):
 
         throttle.maybe_wait()
         params = make_fjc_idb_lookup_params(row)
+        session_data = SessionData(session.cookies, session.proxy_address)
         chain(
             get_pacer_case_id_and_title.s(
                 pass_through=None,
                 docket_number=row.docket_number,
                 court_id=row.district_id,
-                cookies_data=(session.cookies, session.proxy_address),
+                session_data=session_data,
                 **params,
             ).set(queue=q),
             filter_docket_by_tags.s(tags, row.district_id).set(queue=q),
             get_docket_by_pacer_case_id.s(
                 court_id=row.district_id,
-                cookies_data=(session.cookies, session.proxy_address),
+                session_data=session_data,
                 tag_names=tags,
                 **{
                     "show_parties_and_counsel": True,
diff --git a/cl/corpus_importer/management/commands/export_control.py b/cl/corpus_importer/management/commands/export_control.py
index 518f22a61c..4c24adff94 100644
--- a/cl/corpus_importer/management/commands/export_control.py
+++ b/cl/corpus_importer/management/commands/export_control.py
@@ -8,7 +8,7 @@
 from cl.corpus_importer.tasks import save_ia_docket_to_disk
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.search.models import Court, Docket
 
 PACER_USERNAME = os.environ.get("PACER_USERNAME", settings.PACER_USERNAME)
@@ -85,7 +85,7 @@ def get_data(options, row_transform, tags):
             row["docket_number"],
             row["court"],
             row["case_name"],
-            (session.cookies, session.proxy_address),
+            SessionData(session.cookies, session.proxy_address),
             tags,
             q,
         )
diff --git a/cl/corpus_importer/management/commands/import_patent.py b/cl/corpus_importer/management/commands/import_patent.py
index 1762126457..b6956f0406 100644
--- a/cl/corpus_importer/management/commands/import_patent.py
+++ b/cl/corpus_importer/management/commands/import_patent.py
@@ -11,7 +11,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.recap.constants import PATENT, PATENT_ANDA
 from cl.recap.models import FjcIntegratedDatabase
 from cl.search.models import Docket
@@ -44,7 +44,7 @@ def get_dockets(options: dict) -> None:
         username=PACER_USERNAME, password=PACER_PASSWORD
     )
     session.login()
-
+    session_data = SessionData(session.cookies, session.proxy_address)
     NOS_CODES = [PATENT, PATENT_ANDA]
     DISTRICTS = ["ded", "txwd"]
     START_DATE = "2012-01-01"
@@ -78,12 +78,12 @@ def get_dockets(options: dict) -> None:
                     pass_through=None,
                     docket_number=item.docket_number,
                     court_id=item.district_id,
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data=session_data,
                     **params,
                 ).set(queue=q),
                 get_docket_by_pacer_case_id.s(
                     court_id=item.district_id,
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data=session_data,
                     tag_names=PATENT_TAGS,
                     **{
                         "show_parties_and_counsel": True,
@@ -101,7 +101,7 @@ def get_dockets(options: dict) -> None:
                 get_docket_by_pacer_case_id.s(
                     data={"pacer_case_id": d.pacer_case_id},
                     court_id=d.court_id,
-                    cookies_data=(session.cookies, session.proxy_address),
+                    session_data=session_data,
                     docket_pk=d.pk,
                     tag_names=PATENT_TAGS,
                     **{
diff --git a/cl/corpus_importer/management/commands/invoice_project.py b/cl/corpus_importer/management/commands/invoice_project.py
index d0195491ea..1a48d80f25 100644
--- a/cl/corpus_importer/management/commands/invoice_project.py
+++ b/cl/corpus_importer/management/commands/invoice_project.py
@@ -14,7 +14,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.lib.scorched_utils import ExtraSolrInterface
 from cl.lib.search_utils import build_main_query_from_query_string
 from cl.recap.tasks import process_recap_attachment
@@ -84,7 +84,8 @@ def get_attachment_pages(options):
             chain(
                 # Query the attachment page and process it
                 get_attachment_page_by_rd.s(
-                    result["id"], (session.cookies, session.proxy_address)
+                    result["id"],
+                    SessionData(session.cookies, session.proxy_address),
                 ).set(queue=q),
                 # Take that in a new task and make a PQ object
                 make_attachment_pq_object.s(result["id"], recap_user.pk).set(
@@ -152,7 +153,7 @@ def get_documents(options):
         chain(
             get_pacer_doc_by_rd.s(
                 rd.pk,
-                (session.cookies, session.proxy_address),
+                SessionData(session.cookies, session.proxy_address),
                 tag=TAG_PHASE_2,
             ).set(queue=q),
             extract_recap_pdf.si(rd.pk).set(queue=q),
diff --git a/cl/corpus_importer/management/commands/jackson_project.py b/cl/corpus_importer/management/commands/jackson_project.py
index f4b420de3f..d5afc22f02 100644
--- a/cl/corpus_importer/management/commands/jackson_project.py
+++ b/cl/corpus_importer/management/commands/jackson_project.py
@@ -6,7 +6,7 @@
 from cl.corpus_importer.tasks import get_docket_by_pacer_case_id
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.search.models import Docket
 from cl.search.tasks import add_or_update_recap_docket
 
@@ -41,7 +41,9 @@ def get_dockets(options):
             get_docket_by_pacer_case_id.s(
                 data={"pacer_case_id": d.pacer_case_id},
                 court_id=d.court_id,
-                cookies_data=(session.cookies, session.proxy_address),
+                session_data=SessionData(
+                    session.cookies, session.proxy_address
+                ),
                 docket_pk=d.pk,
                 tag_names=[JACKSON_TAG],
                 **{
diff --git a/cl/corpus_importer/management/commands/kessler_ilnb.py b/cl/corpus_importer/management/commands/kessler_ilnb.py
index d70df6e92e..2c16d3c5d2 100644
--- a/cl/corpus_importer/management/commands/kessler_ilnb.py
+++ b/cl/corpus_importer/management/commands/kessler_ilnb.py
@@ -16,7 +16,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.scrapers.tasks import extract_recap_pdf
 from cl.search.models import DocketEntry, RECAPDocument
 from cl.search.tasks import add_items_to_solr, add_or_update_recap_docket
@@ -53,6 +53,9 @@ def get_dockets(options):
             logger.info(f"Sent {i} tasks to celery so far.")
         logger.info("Doing row %s", i)
         throttle.maybe_wait()
+        session_data = SessionData(
+            pacer_session.cookies, pacer_session.proxy_address
+        )
         chain(
             get_pacer_case_id_and_title.s(
                 pass_through=None,
@@ -60,19 +63,13 @@ def get_dockets(options):
                     row["docket"], row["office"]
                 ),
                 court_id="ilnb",
-                cookies_data=(
-                    pacer_session.cookies,
-                    pacer_session.proxy_address,
-                ),
+                session_data=session_data,
                 office_number=row["office"],
                 docket_number_letters="bk",
             ).set(queue=q),
             get_docket_by_pacer_case_id.s(
                 court_id="ilnb",
-                cookies_data=(
-                    pacer_session.cookies,
-                    pacer_session.proxy_address,
-                ),
+                cookies_data=session_data,
                 tag_names=[TAG],
                 **{
                     "show_parties_and_counsel": True,
@@ -125,7 +122,9 @@ def get_final_docs(options):
             chain(
                 get_pacer_doc_by_rd.s(
                     rd_pk,
-                    (pacer_session.cookies, pacer_session.proxy_address),
+                    SessionData(
+                        pacer_session.cookies, pacer_session.proxy_address
+                    ),
                     tag=TAG_FINALS,
                 ).set(queue=q),
                 extract_recap_pdf.si(rd_pk).set(queue=q),
diff --git a/cl/corpus_importer/management/commands/legal_robot.py b/cl/corpus_importer/management/commands/legal_robot.py
index f37685f470..c435e5780b 100644
--- a/cl/corpus_importer/management/commands/legal_robot.py
+++ b/cl/corpus_importer/management/commands/legal_robot.py
@@ -7,7 +7,7 @@
 from cl.corpus_importer.tasks import add_tags, get_pacer_doc_by_rd
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.lib.scorched_utils import ExtraSolrInterface
 from cl.lib.search_utils import build_main_query_from_query_string
 from cl.scrapers.tasks import extract_recap_pdf
@@ -80,7 +80,9 @@ def get_documents(options):
 
         chain(
             get_pacer_doc_by_rd.s(
-                rd.pk, (session.cookies, session.proxy_address), tag=TAG
+                rd.pk,
+                SessionData(session.cookies, session.proxy_address),
+                tag=TAG,
             ).set(queue=q),
             extract_recap_pdf.si(rd.pk).set(queue=q),
             add_items_to_solr.si([rd.pk], "search.RECAPDocument").set(queue=q),
diff --git a/cl/corpus_importer/management/commands/list_of_creditors_project.py b/cl/corpus_importer/management/commands/list_of_creditors_project.py
index 83482110b7..e3a18d56dd 100644
--- a/cl/corpus_importer/management/commands/list_of_creditors_project.py
+++ b/cl/corpus_importer/management/commands/list_of_creditors_project.py
@@ -16,7 +16,7 @@
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.lib.pacer import map_cl_to_pacer_id
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.lib.redis_utils import create_redis_semaphore
 
 CLIENT_PACER_USERNAME = os.environ.get("CLIENT_PACER_USERNAME", "")
@@ -139,7 +139,7 @@ def query_and_save_creditors_data(options: OptionsType) -> None:
                 )
                 throttle.maybe_wait()
                 query_and_save_list_of_creditors.si(
-                    (session.cookies, session.proxy_address),
+                    SessionData(session.cookies, session.proxy_address),
                     court_id,
                     d_number_file_name,
                     docket_number,
diff --git a/cl/corpus_importer/management/commands/nos_700.py b/cl/corpus_importer/management/commands/nos_700.py
index 600cf8188f..b95c663891 100644
--- a/cl/corpus_importer/management/commands/nos_700.py
+++ b/cl/corpus_importer/management/commands/nos_700.py
@@ -12,7 +12,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.recap.constants import (
     AIRPLANE_PERSONAL_INJURY,
     AIRPLANE_PRODUCT_LIABILITY,
@@ -251,19 +251,20 @@ def get_dockets(options, items, tags, sample_size=0):
         logger.info("Doing row %s: %s", i, row)
 
         throttle.maybe_wait()
+        session_data = SessionData(session.cookies, session.proxy_address)
         params = make_fjc_idb_lookup_params(row)
         chain(
             get_pacer_case_id_and_title.s(
                 pass_through=None,
                 docket_number=row.docket_number,
                 court_id=row.district_id,
-                cookies_data=(session.cookies, session.proxy_address),
+                session_data=session_data,
                 **params,
             ).set(queue=q),
             filter_docket_by_tags.s(tags, row.district_id).set(queue=q),
             get_docket_by_pacer_case_id.s(
                 court_id=row.district_id,
-                cookies_data=(session.cookies, session.proxy_address),
+                session_data=session_data,
                 tag_names=tags,
                 **{
                     "show_parties_and_counsel": True,
diff --git a/cl/corpus_importer/management/commands/nywb_chapter_7.py b/cl/corpus_importer/management/commands/nywb_chapter_7.py
index d66dd7027a..72aaa914c7 100644
--- a/cl/corpus_importer/management/commands/nywb_chapter_7.py
+++ b/cl/corpus_importer/management/commands/nywb_chapter_7.py
@@ -14,7 +14,7 @@
 )
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.pacer_session import ProxyPacerSession
+from cl.lib.pacer_session import ProxyPacerSession, SessionData
 from cl.search.tasks import add_or_update_recap_docket
 
 PACER_USERNAME = os.environ.get("PACER_USERNAME", "UNKNOWN!")
@@ -48,6 +48,10 @@ def get_dockets(options):
             logger.info(f"Sent {i} tasks to celery so far.")
         logger.info("Doing row %s", i)
         throttle.maybe_wait()
+        session_data = SessionData(
+            pacer_session.cookies,
+            pacer_session.proxy_address,
+        )
         chain(
             get_pacer_case_id_and_title.s(
                 pass_through=None,
@@ -55,19 +59,13 @@ def get_dockets(options):
                     row["DOCKET"], row["OFFICE"]
                 ),
                 court_id="nywb",
-                cookies_data=(
-                    pacer_session.cookies,
-                    pacer_session.proxy_address,
-                ),
+                session_data=session_data,
                 office_number=row["OFFICE"],
                 docket_number_letters="bk",
             ).set(queue=q),
             get_docket_by_pacer_case_id.s(
                 court_id="nywb",
-                cookies_data=(
-                    pacer_session.cookies,
-                    pacer_session.proxy_address,
-                ),
+                session_data=session_data,
                 tag_names=[TAG],
                 **{
                     "doc_num_start": 1,
diff --git a/cl/corpus_importer/task_canvases.py b/cl/corpus_importer/task_canvases.py
index 579d22eacb..01ace71b32 100644
--- a/cl/corpus_importer/task_canvases.py
+++ b/cl/corpus_importer/task_canvases.py
@@ -29,13 +29,13 @@ def get_docket_and_claims(
             pass_through=None,
             docket_number=docket_number,
             court_id=court,
-            cookies_data=cookies_data,
+            session_data=cookies_data,
             case_name=case_name,
             docket_number_letters="bk",
         ).set(queue=q),
         get_docket_by_pacer_case_id.s(
             court_id=court,
-            cookies_data=cookies_data,
+            session_data=cookies_data,
             tag_names=tags,
             **{
                 "show_parties_and_counsel": True,
@@ -44,7 +44,7 @@ def get_docket_and_claims(
             }
         ).set(queue=q),
         get_bankr_claims_registry.s(
-            cookies_data=cookies_data, tag_names=tags
+            session_data=cookies_data, tag_names=tags
         ).set(queue=q),
         add_or_update_recap_docket.s().set(queue=q),
     ).apply_async()
@@ -74,9 +74,7 @@ def get_district_attachment_pages(options, rd_pks, tag_names, session):
             break
         throttle.maybe_wait()
         chain(
-            get_attachment_page_by_rd.s(
-                rd_pk, (session.cookies, session.proxy_address)
-            ).set(queue=q),
+            get_attachment_page_by_rd.s(rd_pk, session).set(queue=q),
             make_attachment_pq_object.s(rd_pk, recap_user.pk).set(queue=q),
             process_recap_attachment.s(tag_names=tag_names).set(queue=q),
         ).apply_async()
diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 3409cf1f29..4e55b7b0de 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -46,7 +46,6 @@
 from pyexpat import ExpatError
 from redis import ConnectionError as RedisConnectionError
 from requests import Response
-from requests.cookies import RequestsCookieJar
 from requests.exceptions import (
     ConnectionError,
     HTTPError,
@@ -83,6 +82,7 @@
 )
 from cl.lib.pacer_session import (
     ProxyPacerSession,
+    SessionData,
     get_or_cache_pacer_cookies,
     get_pacer_cookie_from_cache,
 )
@@ -336,16 +336,16 @@ def get_and_save_free_document_report(
     :param end: a date object representing the last day to get results.
     :return: The status code of the scrape
     """
-    cookies, proxy_address = get_or_cache_pacer_cookies(
+    session_data = get_or_cache_pacer_cookies(
         "pacer_scraper",
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
     )
     s = ProxyPacerSession(
-        cookies=cookies,
+        cookies=session_data.cookies,
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
-        proxy=proxy_address,
+        proxy=session_data.proxy_address,
     )
     report = FreeOpinionReport(court_id, s)
     msg = ""
@@ -940,7 +940,7 @@ def get_pacer_case_id_and_title(
     pass_through: Any,
     docket_number: str,
     court_id: str,
-    cookies_data: tuple[RequestsCookieJar, str] | None = None,
+    session_data: SessionData | None = None,
     user_pk: int | None = None,
     case_name: str | None = None,
     office_number: str | None = None,
@@ -961,9 +961,8 @@ def get_pacer_case_id_and_title(
     :param docket_number: The docket number to look up. This is a flexible
     field that accepts a variety of docket number styles.
     :param court_id: The CourtListener court ID for the docket number
-    :param cookies_data: A tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address (optional)
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param user_pk: The PK of a user making the request. This can be provided
     instead of the cookies parameter. If so, this will get the user's cookies
     from redis instead of passing them in as an argument.
@@ -992,22 +991,21 @@ def get_pacer_case_id_and_title(
         court_id,
     )
 
-    if cookies_data:
-        cookies, proxy_address = cookies_data
-    elif user_pk:
+    if not session_data and user_pk:
         cookies_from_cache = get_pacer_cookie_from_cache(user_pk)
-        if isinstance(cookies_from_cache, tuple):
-            cookies, proxy_address = cookies_from_cache
-        cookies, proxy_address = (
-            cookies_from_cache,
-            settings.EGRESS_PROXY_HOSTS[0],
+        session_data = (
+            cookies_from_cache
+            if isinstance(cookies_from_cache, SessionData)
+            else SessionData(cookies_from_cache)
         )
     else:
         raise Exception(
             "user_pk is unavailable, cookies cannot be retrieved from cache"
         )
 
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     report = PossibleCaseNumberApi(map_cl_to_pacer_id(court_id), s)
     msg = ""
     try:
@@ -1056,7 +1054,7 @@ def do_case_query_by_pacer_case_id(
     self: Task,
     data: TaskData,
     court_id: str,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     tag_names: List[str] | None = None,
 ) -> TaskData | None:
     """Run a case query (iquery.pl) query on a case and save the data
@@ -1066,14 +1064,15 @@ def do_case_query_by_pacer_case_id(
         'pacer_case_id': The internal pacer case ID for the item.
     }
     :param court_id: A courtlistener court ID
-    :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a
-    logged-in PACER user.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param tag_names: A list of tag names to associate with the docket when
     saving it in the DB.
     :return: A dict with the pacer_case_id and docket_pk values.
     """
-    cookies, proxy_address = cookies_data
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     if data is None:
         logger.info("Empty data argument. Terminating chains and exiting.")
         self.request.chain = None
@@ -1182,16 +1181,16 @@ def query_case_query_report(
     :return: The report.data.
     """
 
-    cookies, proxy_address = get_or_cache_pacer_cookies(
+    session_data = get_or_cache_pacer_cookies(
         "pacer_scraper",
         settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
     )
     s = ProxyPacerSession(
-        cookies=cookies,
+        cookies=session_data.cookies,
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
-        proxy=proxy_address,
+        proxy=session_data.proxy_address,
     )
     report = CaseQuery(map_cl_to_pacer_id(court_id), s)
     report.query(pacer_case_id)
@@ -1550,7 +1549,7 @@ def get_docket_by_pacer_case_id(
     self: Task,
     data: TaskData,
     court_id: str,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     docket_pk: Optional[int] = None,
     tag_names: Optional[str] = None,
     **kwargs,
@@ -1566,9 +1565,8 @@ def get_docket_by_pacer_case_id(
         Optional: 'docket_pk': The ID of the docket to work on to avoid lookups
                   if it's known in advance.
     :param court_id: A courtlistener court ID.
-    :param cookies_data: A tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param docket_pk: The PK of the docket to update. Can also be provided in
     the data param, above.
     :param tag_names: A list of tag names that should be stored with the item
@@ -1602,8 +1600,9 @@ def get_docket_by_pacer_case_id(
 
     logging_id = f"{court_id}.{pacer_case_id}"
     logger.info("Querying docket report %s", logging_id)
-    cookies, proxy_address = cookies_data
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     report = DocketReport(map_cl_to_pacer_id(court_id), s)
     try:
         report.query(pacer_case_id, **kwargs)
@@ -1654,7 +1653,7 @@ def get_appellate_docket_by_docket_number(
     self: Task,
     docket_number: str,
     court_id: str,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     tag_names: Optional[List[str]] = None,
     **kwargs,
 ) -> Optional[TaskData]:
@@ -1666,15 +1665,16 @@ def get_appellate_docket_by_docket_number(
     :param self: The celery task
     :param docket_number: The docket number of the case.
     :param court_id: A courtlistener/PACER appellate court ID.
-    :param cookies_data: A tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param tag_names: The tag name that should be stored with the item in the
     DB, if desired.
     :param kwargs: A variety of keyword args to pass to DocketReport.query().
     """
-    cookies, proxy_address = cookies_data
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     report = AppellateDocketReport(court_id, s)
     logging_id = f"{court_id} - {docket_number}"
     logger.info("Querying docket report %s", logging_id)
@@ -1724,21 +1724,21 @@ def get_appellate_docket_by_docket_number(
 
 def get_att_report_by_rd(
     rd: RECAPDocument,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
 ) -> Optional[AttachmentPage]:
     """Method to get the attachment report for the item in PACER.
 
     :param rd: The RECAPDocument object to use as a source.
-    :param cookies_data: A tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :return: The attachment report populated with the results
     """
     if not rd.pacer_doc_id:
         return None
 
-    cookies, proxy_address = cookies_data
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     pacer_court_id = map_cl_to_pacer_id(rd.docket_entry.docket.court_id)
     att_report = AttachmentPage(pacer_court_id, s)
     att_report.query(rd.pacer_doc_id)
@@ -1756,15 +1756,14 @@ def get_att_report_by_rd(
 def get_attachment_page_by_rd(
     self: Task,
     rd_pk: int,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
 ) -> Optional[AttachmentPage]:
     """Get the attachment page for the item in PACER.
 
     :param self: The celery task
     :param rd_pk: The PK of a RECAPDocument object to use as a source.
-    :param cookies_data: tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :return: The attachment report populated with the results
     """
     rd = RECAPDocument.objects.get(pk=rd_pk)
@@ -1773,7 +1772,7 @@ def get_attachment_page_by_rd(
         self.request.chain = None
         return None
     try:
-        att_report = get_att_report_by_rd(rd, cookies_data)
+        att_report = get_att_report_by_rd(rd, session_data)
     except HTTPError as exc:
         if exc.response and exc.response.status_code in [
             HTTPStatus.INTERNAL_SERVER_ERROR,
@@ -1811,7 +1810,7 @@ def get_attachment_page_by_rd(
 def get_bankr_claims_registry(
     self: Task,
     data: TaskData,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     tag_names: List[str] | None = None,
 ) -> TaskData | None:
     """Get the bankruptcy claims registry for a docket
@@ -1820,14 +1819,15 @@ def get_bankr_claims_registry(
     :param data: A dict of data containing, primarily, a key to 'docket_pk' for
     the docket for which we want to get the registry. Other keys will be
     ignored.
-    :param cookies_data: A tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param tag_names: A list of tag names that should be stored with the claims
     registry information in the DB.
     """
-    cookies, proxy_address = cookies_data
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     if data is None or data.get("docket_pk") is None:
         logger.warning(
             "Empty data argument or parameter. Terminating chains "
@@ -1925,7 +1925,7 @@ def download_pacer_pdf_by_rd(
     rd_pk: int,
     pacer_case_id: str,
     pacer_doc_id: int,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     magic_number: str | None = None,
 ) -> tuple[Response | None, str]:
     """Using a RECAPDocument object ID, download the PDF if it doesn't already
@@ -1934,19 +1934,19 @@ def download_pacer_pdf_by_rd(
     :param rd_pk: The PK of the RECAPDocument to download
     :param pacer_case_id: The internal PACER case ID number
     :param pacer_doc_id: The internal PACER document ID to download
-    :param cookies_data: A tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param magic_number: The magic number to fetch PACER documents for free
     this is an optional field, only used by RECAP Email documents
     :return: A two-tuple of requests.Response object usually containing a PDF,
     or None if that wasn't possible, and a string representing the error if
     there was one.
     """
-    cookies, proxy_address = cookies_data
     rd = RECAPDocument.objects.get(pk=rd_pk)
     pacer_court_id = map_cl_to_pacer_id(rd.docket_entry.docket.court_id)
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     report = FreeOpinionReport(pacer_court_id, s)
 
     r, r_msg = report.download_pdf(pacer_case_id, pacer_doc_id, magic_number)
@@ -1958,7 +1958,7 @@ def download_pdf_by_magic_number(
     court_id: str,
     pacer_doc_id: str,
     pacer_case_id: str,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     magic_number: str,
     appellate: bool = False,
 ) -> tuple[Response | None, str]:
@@ -1967,17 +1967,17 @@ def download_pdf_by_magic_number(
     :param court_id: A CourtListener court ID to query the free document.
     :param pacer_doc_id: The pacer_doc_id to query the free document.
     :param pacer_case_id: The pacer_case_id to query the free document.
-    :param cookies_data: tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param magic_number: The magic number to fetch PACER documents for free.
     :param appellate: Whether the download belongs to an appellate court.
     :return: A two-tuple of requests.Response object usually containing a PDF,
     or None if that wasn't possible, and a string representing the error if
     there was one.
     """
-    cookies, proxy_address = cookies_data
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     report = FreeOpinionReport(court_id, s)
     r, r_msg = report.download_pdf(
         pacer_case_id, pacer_doc_id, magic_number, appellate
@@ -1996,10 +1996,12 @@ def get_document_number_from_confirmation_page(
     """
 
     recap_email_user = User.objects.get(username="recap-email")
-    cookies, proxy_address = get_or_cache_pacer_cookies(
+    session_data = get_or_cache_pacer_cookies(
         recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD
     )
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     doc_num_report = DownloadConfirmationPage(court_id, s)
     doc_num_report.query(pacer_doc_id)
     data = doc_num_report.data
@@ -2070,10 +2072,12 @@ def is_pacer_doc_sealed(court_id: str, pacer_doc_id: str) -> bool:
     """
 
     recap_email_user = User.objects.get(username="recap-email")
-    cookies, proxy_address = get_or_cache_pacer_cookies(
+    session_data = get_or_cache_pacer_cookies(
         recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD
     )
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     receipt_report = DownloadConfirmationPage(court_id, s)
     receipt_report.query(pacer_doc_id)
     data = receipt_report.data
@@ -2100,11 +2104,13 @@ def is_docket_entry_sealed(
         return False
 
     recap_email_user = User.objects.get(username="recap-email")
-    cookies, proxy_address = get_or_cache_pacer_cookies(
+    session_data = get_or_cache_pacer_cookies(
         recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD
     )
 
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     report = BaseReport(court_id, s)
     return report.is_entry_sealed(case_id, doc_id)
 
@@ -2207,16 +2213,15 @@ def add_tags(rd: RECAPDocument, tag_name: Optional[str]) -> None:
 def get_pacer_doc_by_rd(
     self: Task,
     rd_pk: int,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     tag: Optional[str] = None,
 ) -> Optional[int]:
     """A simple method for getting the PDF associated with a RECAPDocument.
 
     :param self: The bound celery task
     :param rd_pk: The PK for the RECAPDocument object
-    :param cookies_data: tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param tag: The name of a tag to apply to any modified items
     :return: The RECAPDocument PK
     """
@@ -2229,7 +2234,7 @@ def get_pacer_doc_by_rd(
 
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
     r, r_msg = download_pacer_pdf_by_rd(
-        rd.pk, pacer_case_id, rd.pacer_doc_id, cookies_data
+        rd.pk, pacer_case_id, rd.pacer_doc_id, session_data
     )
     court_id = rd.docket_entry.docket.court_id
 
@@ -2267,7 +2272,7 @@ def get_pacer_doc_by_rd_and_description(
     self: Task,
     rd_pk: int,
     description_re: Pattern,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     fallback_to_main_doc: bool = False,
     tag_name: Optional[List[str]] = None,
 ) -> None:
@@ -2281,16 +2286,15 @@ def get_pacer_doc_by_rd_and_description(
     :param rd_pk: The PK of a RECAPDocument object to use as a source.
     :param description_re: A compiled regular expression to search against the
     description provided by the attachment page.
-    :param cookies_data: tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param fallback_to_main_doc: Should we grab the main doc if none of the
     attachments match the regex?
     :param tag_name: A tag name to apply to any downloaded content.
     :return: None
     """
     rd = RECAPDocument.objects.get(pk=rd_pk)
-    att_report = get_attachment_page_by_rd(self, rd_pk, cookies_data)
+    att_report = get_attachment_page_by_rd(self, rd_pk, session_data)
 
     att_found = None
     for attachment in att_report.data.get("attachments", []):
@@ -2339,7 +2343,7 @@ def get_pacer_doc_by_rd_and_description(
 
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
     r, r_msg = download_pacer_pdf_by_rd(
-        rd.pk, pacer_case_id, att_found["pacer_doc_id"], cookies_data
+        rd.pk, pacer_case_id, att_found["pacer_doc_id"], session_data
     )
     court_id = rd.docket_entry.docket.court_id
 
@@ -2377,20 +2381,20 @@ def get_pacer_doc_by_rd_and_description(
 def get_pacer_doc_id_with_show_case_doc_url(
     self: Task,
     rd_pk: int,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
 ) -> None:
     """use the show_case_doc URL to get pacer_doc_id values.
 
     :param self: The celery task
     :param rd_pk: The pk of the RECAPDocument you want to get.
-    :param cookies_data: tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     """
     rd = RECAPDocument.objects.get(pk=rd_pk)
     d = rd.docket_entry.docket
-    cookies, proxy_address = cookies_data
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     pacer_court_id = map_cl_to_pacer_id(d.court_id)
     report = ShowCaseDocApi(pacer_court_id, s)
     last_try = self.request.retries == self.max_retries
@@ -2480,7 +2484,7 @@ def make_list_of_creditors_key(court_id: str, d_number_file_name: str) -> str:
 @throttle_task("1/s", key="court_id")
 def query_and_save_list_of_creditors(
     self: Task,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     court_id: str,
     d_number_file_name: str,
     docket_number: str,
@@ -2492,9 +2496,8 @@ def query_and_save_list_of_creditors(
     HTML and pipe-limited text files and convert them to CSVs.
 
     :param self: The celery task
-    :param cookies_data: tuple containing the PACER user's cookies
-    (`requests.cookies.RequestsCookieJar`) and the proxy address used to login
-    as a string.
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param court_id: The court_id for the bankruptcy court.
     :param d_number_file_name: The docket number to use as file name.
     :param docket_number: The docket number of the case.
@@ -2504,8 +2507,9 @@ def query_and_save_list_of_creditors(
 
     :return: None
     """
-    cookies, proxy_address = cookies_data
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
+    )
     try:
         report = ListOfCreditors(court_id, s)
     except AssertionError:
diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index 13bf5022fc..bd38b98e5a 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -499,10 +499,7 @@ def test_party_parsing(self) -> None:
         self.assertEqual(godfrey_llp.city, "Seattle")
         self.assertEqual(godfrey_llp.state, "WA")
 
-    @patch(
-        "cl.corpus_importer.tasks.get_or_cache_pacer_cookies",
-        return_value=(None, None),
-    )
+    @patch("cl.corpus_importer.tasks.get_or_cache_pacer_cookies")
     def test_get_and_save_free_document_report(self, mock_cookies) -> None:
         """Test the retrieval and storage of free document report data."""
 
@@ -3344,10 +3341,7 @@ def test_merger(self):
             )
 
 
-@patch(
-    "cl.corpus_importer.tasks.get_or_cache_pacer_cookies",
-    return_value=(None, None),
-)
+@patch("cl.corpus_importer.tasks.get_or_cache_pacer_cookies")
 @override_settings(
     IQUERY_PROBE_DAEMON_ENABLED=True,
     IQUERY_SWEEP_UPLOADS_SIGNAL_ENABLED=True,
diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py
index 20cb7d5c05..e1d3000837 100644
--- a/cl/lib/pacer_session.py
+++ b/cl/lib/pacer_session.py
@@ -1,5 +1,6 @@
 import pickle
 import random
+from dataclasses import dataclass
 from typing import Union
 from urllib.parse import urlparse
 
@@ -13,6 +14,27 @@
 session_key = "session:pacer:cookies:user.%s"
 
 
+@dataclass
+class SessionData:
+    """
+    The goal of this class is to encapsulate data required for PACER requests.
+
+    This class serves as a lightweight container for PACER session data,
+    excluding authentication details for efficient caching.
+
+    Handles default values for the `proxy` attribute when not explicitly
+    provided, indicating session data was not generated using the
+    `ProxyPacerSession` class.
+    """
+
+    cookies: RequestsCookieJar
+    proxy_address: str = ""
+
+    def __post_init__(self):
+        if not self.proxy_address:
+            self.proxy_address = settings.EGRESS_PROXY_HOSTS[0]
+
+
 class ProxyPacerSession(PacerSession):
     """
     This class overrides the _prepare_login_request and post methods of the
@@ -94,13 +116,14 @@ def log_into_pacer(
     username: str,
     password: str,
     client_code: str | None = None,
-) -> tuple[RequestsCookieJar, str]:
-    """Log into PACER and return the cookie jar
+) -> SessionData:
+    """Log into PACER and returns a SessionData object containing the session's
+    cookies and proxy information.
 
     :param username: A PACER username
     :param password: A PACER password
     :param client_code: A PACER client_code
-    :return: A tuple containing the Request.CookieJar and the proxy address
+    :return: A SessionData object containing the session's cookies and proxy.
     """
     s = ProxyPacerSession(
         username=username,
@@ -108,7 +131,7 @@ def log_into_pacer(
         client_code=client_code,
     )
     s.login()
-    return s.cookies, s.proxy_address
+    return SessionData(s.cookies, s.proxy_address)
 
 
 def get_or_cache_pacer_cookies(
@@ -117,7 +140,7 @@ def get_or_cache_pacer_cookies(
     password: str,
     client_code: str | None = None,
     refresh: bool = False,
-) -> tuple[RequestsCookieJar, str]:
+) -> SessionData:
     """Get PACER cookies for a user or create and cache fresh ones
 
     For the PACER Fetch API, we store users' PACER cookies in Redis with a
@@ -134,27 +157,27 @@ def get_or_cache_pacer_cookies(
     :param password: The PACER password of the user
     :param client_code: The PACER client code of the user
     :param refresh: If True, refresh the cookies even if they're already cached
-    :return: A tuple containing the Request.CookieJar and the proxy address
+    :return: A SessionData object containing the session's cookies and proxy.
     """
     r = get_redis_interface("CACHE", decode_responses=False)
     cookies_data = get_pacer_cookie_from_cache(user_pk, r=r)
     ttl_seconds = r.ttl(session_key % user_pk)
     if cookies_data and ttl_seconds >= 300 and not refresh:
         # cookies were found in cache and ttl >= 5 minutes, return them
-        if isinstance(cookies_data, tuple):
+        if isinstance(cookies_data, SessionData):
             return cookies_data
-        return cookies_data, settings.EGRESS_PROXY_HOSTS[0]
+        return SessionData(cookies_data)
 
     # Unable to find cookies in cache, are about to expire or refresh needed
     # Login and cache new values.
-    cookies, proxy = log_into_pacer(username, password, client_code)
+    session_data = log_into_pacer(username, password, client_code)
     cookie_expiration = 60 * 60
     r.set(
         session_key % user_pk,
-        pickle.dumps((cookies, proxy)),
+        pickle.dumps(session_data),
         ex=cookie_expiration,
     )
-    return cookies, proxy
+    return session_data
 
 
 def get_pacer_cookie_from_cache(
diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index 43d8696123..48121c622d 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -28,6 +28,7 @@
 )
 from cl.lib.pacer_session import (
     ProxyPacerSession,
+    SessionData,
     get_or_cache_pacer_cookies,
     session_key,
 )
@@ -97,6 +98,11 @@ class TestPacerSessionUtils(TestCase):
 
     def setUp(self) -> None:
         r = get_redis_interface("CACHE", decode_responses=False)
+        # clean keys to make sure we dont have it in the cache from
+        # previous executions
+        key = r.keys(session_key % "test_user_new_cookie")
+        if key:
+            r.delete(*key)
         self.test_cookies = RequestsCookieJar()
         self.test_cookies.set("PacerSession", "this-is-a-test")
         r.set(
@@ -106,7 +112,9 @@ def setUp(self) -> None:
         )
         r.set(
             session_key % "test_user_new_format",
-            pickle.dumps((self.test_cookies, "http://proxy_1:9090")),
+            pickle.dumps(
+                SessionData(self.test_cookies, "http://proxy_1:9090")
+            ),
             ex=60 * 60,
         )
         r.set(
@@ -116,7 +124,9 @@ def setUp(self) -> None:
         )
         r.set(
             session_key % "test_new_format_almost_expired",
-            pickle.dumps((self.test_cookies, "http://proxy_1:9090")),
+            pickle.dumps(
+                SessionData(self.test_cookies, "http://proxy_1:9090")
+            ),
             ex=60,
         )
 
@@ -130,74 +140,70 @@ def test_pick_random_proxy_when_list_is_available(self):
 
     def test_use_default_proxy_host_for_old_cookie_format(self):
         """Can we handle the old cookie format properly?"""
-        cookies_data = get_or_cache_pacer_cookies(
+        session_data = get_or_cache_pacer_cookies(
             "test_user_old_format", username="test", password="password"
         )
-        self.assertIsInstance(cookies_data, tuple)
-        _, proxy = cookies_data
-        self.assertEqual(proxy, settings.EGRESS_PROXY_HOSTS[0])
+        self.assertIsInstance(session_data, SessionData)
+        self.assertEqual(session_data.proxy_address, "http://proxy_1:9090")
 
     @patch("cl.lib.pacer_session.log_into_pacer")
     def test_compute_new_cookies_with_new_format(self, mock_log_into_pacer):
-        """Are we using the tuple format for new cookies?"""
-        mock_log_into_pacer.return_value = (
+        """Are we using the dataclass for new cookies?"""
+        mock_log_into_pacer.return_value = SessionData(
             self.test_cookies,
             "http://proxy_1:9090",
         )
-        cookies_data = get_or_cache_pacer_cookies(
+        session_data = get_or_cache_pacer_cookies(
             "test_user_new_cookie", username="test", password="password"
         )
-        self.assertIsInstance(cookies_data, tuple)
-        _, proxy = cookies_data
-        self.assertEqual(proxy, "http://proxy_1:9090")
+        self.assertEqual(mock_log_into_pacer.call_count, 1)
+        self.assertIsInstance(session_data, SessionData)
+        self.assertEqual(session_data.proxy_address, "http://proxy_1:9090")
 
-    def test_parse_cookie_proxy_pair_properly(self):
-        """Can we parse the tuple format from cache properly?"""
-        cookies_data = get_or_cache_pacer_cookies(
+    @patch("cl.lib.pacer_session.log_into_pacer")
+    def test_parse_cookie_proxy_pair_properly(self, mock_log_into_pacer):
+        """Can we parse the dataclass from cache properly?"""
+        session_data = get_or_cache_pacer_cookies(
             "test_user_new_format", username="test", password="password"
         )
-        self.assertIsInstance(cookies_data, tuple)
-        _, proxy = cookies_data
-        self.assertEqual(proxy, "http://proxy_1:9090")
+        self.assertEqual(mock_log_into_pacer.call_count, 0)
+        self.assertIsInstance(session_data, SessionData)
+        self.assertEqual(session_data.proxy_address, "http://proxy_1:9090")
 
     @patch("cl.lib.pacer_session.log_into_pacer")
     def test_compute_cookies_for_almost_expired_data(
         self, mock_log_into_pacer
     ):
-        """Are we using the tuple format when re-computing session?"""
-        mock_log_into_pacer.return_value = (
-            self.test_cookies,
-            "http://proxy_1:9090",
+        """Are we using the dataclass when re-computing session?"""
+        mock_log_into_pacer.return_value = SessionData(
+            self.test_cookies, "http://proxy_1:9090"
         )
 
         # Attempts to get almost expired cookies with the old format from cache
         # Expects refresh.
-        cookies = get_or_cache_pacer_cookies(
+        session_data = get_or_cache_pacer_cookies(
             "test_old_format_almost_expired",
             username="test",
             password="password",
         )
-        self.assertIsInstance(cookies, tuple)
-        _, proxy = cookies
         self.assertEqual(mock_log_into_pacer.call_count, 1)
-        self.assertEqual(proxy, "http://proxy_1:9090")
+        self.assertIsInstance(session_data, SessionData)
+        self.assertEqual(session_data.proxy_address, "http://proxy_1:9090")
 
-        mock_log_into_pacer.return_value = (
-            self.test_cookies,
-            "http://proxy_2:9090",
+        mock_log_into_pacer.return_value = SessionData(
+            self.test_cookies, "http://proxy_2:9090"
         )
 
         # Attempts to get almost expired cookies with the new format from cache
         # Expects refresh.
-        cookies = get_or_cache_pacer_cookies(
+        session_data = get_or_cache_pacer_cookies(
             "test_new_format_almost_expired",
             username="test",
             password="password",
         )
-        self.assertIsInstance(cookies, tuple)
-        _, proxy = cookies
+        self.assertIsInstance(session_data, SessionData)
         self.assertEqual(mock_log_into_pacer.call_count, 2)
-        self.assertEqual(proxy, "http://proxy_2:9090")
+        self.assertEqual(session_data.proxy_address, "http://proxy_2:9090")
 
 
 class TestStringUtils(SimpleTestCase):
diff --git a/cl/recap/management/commands/merge_idb_into_dockets.py b/cl/recap/management/commands/merge_idb_into_dockets.py
index e8a741020d..ba90c71071 100644
--- a/cl/recap/management/commands/merge_idb_into_dockets.py
+++ b/cl/recap/management/commands/merge_idb_into_dockets.py
@@ -142,7 +142,9 @@ def update_any_missing_pacer_case_ids(options):
                     pass_through=d.pk,
                     docket_number=d.idb_data.docket_number,
                     court_id=d.idb_data.district_id,
-                    cookies_data=(session.cookies, session.proxy_address),
+                    cookies_data=SessionData(
+                        session.cookies, session.proxy_address
+                    ),
                     **params,
                 ).set(queue=q),
                 update_docket_from_hidden_api.s().set(queue=q),
diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index f835c79a5f..ca78940de6 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -38,7 +38,6 @@
 from juriscraper.pacer.email import DocketType
 from redis import ConnectionError as RedisConnectionError
 from requests import HTTPError
-from requests.cookies import RequestsCookieJar
 from requests.packages.urllib3.exceptions import ReadTimeoutError
 
 from cl.alerts.tasks import enqueue_docket_alert, send_alert_and_webhook
@@ -61,6 +60,7 @@
 from cl.lib.pacer import is_pacer_court_accessible, map_cl_to_pacer_id
 from cl.lib.pacer_session import (
     ProxyPacerSession,
+    SessionData,
     delete_pacer_cookie_from_cache,
     get_or_cache_pacer_cookies,
     get_pacer_cookie_from_cache,
@@ -1647,10 +1647,16 @@ def fetch_pacer_doc_by_rd(
         self.request.chain = None
         return
 
-    cookies_data = (
-        cookies
-        if isinstance(cookies, tuple)
-        else (cookies, settings.EGRESS_PROXY_HOSTS[0])
+    # Ensures session data is a `SessionData` instance for consistent handling.
+    #
+    # Currently, handles potential legacy data by converting them to
+    # `SessionData`. This defensive check can be removed in future versions
+    # once all data is guaranteed to be in the expected format.
+    #
+    # This approach prevents disruptions during processing of enqueued data
+    # after deployment.
+    session_data = (
+        cookies if isinstance(cookies, SessionData) else SessionData(cookies)
     )
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
     try:
@@ -1658,7 +1664,7 @@ def fetch_pacer_doc_by_rd(
             rd.pk,
             pacer_case_id,
             rd.pacer_doc_id,
-            cookies_data,
+            session_data,
             magic_number,
         )
     except (requests.RequestException, HTTPError):
@@ -1750,13 +1756,14 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> None:
         mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
         return
 
-    cookies_data = (
-        cookies
-        if isinstance(cookies, tuple)
-        else (cookies, settings.EGRESS_PROXY_HOSTS[0])
+    # Ensures session data is a `SessionData` instance for consistent handling.
+    # This approach prevents disruptions during processing of enqueued data
+    # after deployment.
+    session_data = (
+        cookies if isinstance(cookies, SessionData) else SessionData(cookies)
     )
     try:
-        r = get_att_report_by_rd(rd, cookies_data)
+        r = get_att_report_by_rd(rd, session_data)
     except HTTPError as exc:
         msg = "Failed to get attachment page from network."
         if exc.response.status_code in [
@@ -1935,12 +1942,14 @@ def fetch_docket(self, fq_pk):
         self.request.chain = None
         return None
 
-    cookies, proxy_address = (
+    session_data = (
         cookies_data
-        if isinstance(cookies_data, tuple)
-        else (cookies_data, settings.EGRESS_PROXY_HOSTS[0])
+        if isinstance(cookies_data, SessionData)
+        else SessionData(cookies_data)
+    )
+    s = ProxyPacerSession(
+        cookies=session_data.cookies, proxy=session_data.proxy_address
     )
-    s = ProxyPacerSession(cookies=cookies, proxy=proxy_address)
     try:
         result = fetch_pacer_case_id_and_title(s, fq, court_id)
     except (requests.RequestException, ReadTimeoutError) as exc:
@@ -2179,7 +2188,7 @@ def save_pacer_doc_from_pq(
 
 def download_pacer_pdf_and_save_to_pq(
     court_id: str,
-    cookies_data: tuple[RequestsCookieJar, str],
+    session_data: SessionData,
     cutoff_date: datetime,
     magic_number: str | None,
     pacer_case_id: str,
@@ -2195,7 +2204,8 @@ def download_pacer_pdf_and_save_to_pq(
     PQ object. Increasing the reliability of saving PACER documents.
 
     :param court_id: A CourtListener court ID to query the free document.
-    :param cookies_data: The cookies of a logged in PACER session
+    :param session_data: A SessionData object containing the session's cookies
+    and proxy.
     :param cutoff_date: The datetime from which we should query
      ProcessingQueue objects. For the main RECAPDocument the datetime the
      EmailProcessingQueue was created. For attachments the datetime the
@@ -2232,7 +2242,7 @@ def download_pacer_pdf_and_save_to_pq(
                 court_id,
                 pacer_doc_id,
                 pacer_case_id,
-                cookies_data,
+                session_data,
                 magic_number,
                 appellate,
             )
@@ -2279,10 +2289,8 @@ def get_and_copy_recap_attachment_docs(
     """
 
     cookies = get_pacer_cookie_from_cache(user_pk)
-    cookies_data = (
-        cookies
-        if isinstance(cookies, tuple)
-        else (cookies, settings.EGRESS_PROXY_HOSTS[0])
+    session_data = (
+        cookies if isinstance(cookies, SessionData) else SessionData(cookies)
     )
     appellate = False
     unique_pqs = []
@@ -2290,7 +2298,7 @@ def get_and_copy_recap_attachment_docs(
         cutoff_date = rd_att.date_created
         pq = download_pacer_pdf_and_save_to_pq(
             court_id,
-            cookies_data,
+            session_data,
             cutoff_date,
             magic_number,
             pacer_case_id,
@@ -2395,10 +2403,11 @@ def get_and_merge_rd_attachments(
 
     all_attachment_rds = []
     cookies = get_pacer_cookie_from_cache(user_pk)
-    cookies_data = (
-        cookies
-        if isinstance(cookies, tuple)
-        else (cookies, settings.EGRESS_PROXY_HOSTS[0])
+    # Ensures session data is a `SessionData` instance for consistent handling.
+    # This approach prevents disruptions during processing of enqueued data
+    # after deployment.
+    session_data = (
+        cookies if isinstance(cookies, SessionData) else SessionData(cookies)
     )
     # Try to get the attachment page without being logged into PACER
     att_report_text = get_attachment_page_by_url(document_url, court_id)
@@ -2411,7 +2420,7 @@ def get_and_merge_rd_attachments(
             .recap_documents.earliest("date_created")
         )
         # Get the attachment page being logged into PACER
-        att_report = get_att_report_by_rd(main_rd, cookies_data)
+        att_report = get_att_report_by_rd(main_rd, session_data)
 
     for docket_entry in dockets_updated:
         # Merge the attachments for each docket/recap document
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index a1c91ffce1..a9f1d31bee 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -1295,10 +1295,7 @@ async def test_missing_receipt_properties_fails(self):
         "cl.recap.tasks.RecapEmailSESStorage.open",
         side_effect=mock_bucket_open,
     )
-    @mock.patch(
-        "cl.recap.tasks.get_or_cache_pacer_cookies",
-        side_effect=lambda x, y, z: (None, None),
-    )
+    @mock.patch("cl.recap.tasks.get_or_cache_pacer_cookies")
     @mock.patch(
         "cl.recap.tasks.is_docket_entry_sealed",
         return_value=False,
diff --git a/cl/scrapers/tasks.py b/cl/scrapers/tasks.py
index f7d5102833..446c7e9454 100644
--- a/cl/scrapers/tasks.py
+++ b/cl/scrapers/tasks.py
@@ -410,16 +410,16 @@ def update_docket_info_iquery(self, d_pk: int, court_id: str) -> None:
     :param court_id: The court of the docket. Needed for throttling by court.
     :return: None
     """
-    cookies, proxy = get_or_cache_pacer_cookies(
+    session_data = get_or_cache_pacer_cookies(
         "pacer_scraper",
         settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
     )
     s = ProxyPacerSession(
-        cookies=cookies,
+        cookies=session_data.cookies,
         username=settings.PACER_USERNAME,
         password=settings.PACER_PASSWORD,
-        proxy=proxy,
+        proxy=session_data.proxy_address,
     )
     d = Docket.objects.get(pk=d_pk, court_id=court_id)
     report = CaseQuery(map_cl_to_pacer_id(d.court_id), s)

From ea91f429eef721ce9af845fda12c4627f56985eb Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Tue, 30 Jul 2024 09:18:07 -0400
Subject: [PATCH 098/372] fix(corpus importer): Use correct parameter name for
 session data

---
 cl/corpus_importer/management/commands/760_project.py | 6 +++---
 cl/lib/tests.py                                       | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/cl/corpus_importer/management/commands/760_project.py b/cl/corpus_importer/management/commands/760_project.py
index 2abedf3e54..b31c3a810c 100644
--- a/cl/corpus_importer/management/commands/760_project.py
+++ b/cl/corpus_importer/management/commands/760_project.py
@@ -56,7 +56,7 @@ def get_dockets(options):
                 get_appellate_docket_by_docket_number.s(
                     docket_number=row["Cleaned case_No"],
                     court_id=row["fjc_court_id"],
-                    session_data_data=session_data,
+                    session_data=session_data,
                     tag_names=[TAG],
                     **{
                         "show_docket_entries": True,
@@ -76,12 +76,12 @@ def get_dockets(options):
                     pass_through=None,
                     docket_number=row["Cleaned case_No"],
                     court_id=row["fjc_court_id"],
-                    session_data_data=session_data,
+                    session_data=session_data,
                     case_name=row["Title"],
                 ).set(queue=q),
                 get_docket_by_pacer_case_id.s(
                     court_id=row["fjc_court_id"],
-                    session_data_data=session_data,
+                    session_data=session_data,
                     tag_names=[TAG],
                     **{
                         "show_parties_and_counsel": True,
diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index 48121c622d..ec1c9bcf7e 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -98,8 +98,7 @@ class TestPacerSessionUtils(TestCase):
 
     def setUp(self) -> None:
         r = get_redis_interface("CACHE", decode_responses=False)
-        # clean keys to make sure we dont have it in the cache from
-        # previous executions
+        # Clear cached session keys to prevent data inconsistencies.
         key = r.keys(session_key % "test_user_new_cookie")
         if key:
             r.delete(*key)

From a5dab12a03d7f22ef563f48bf4693797528eac11 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 07:34:24 -0600
Subject: [PATCH 099/372] feat(scraper_pacer_free_opinions): refactor code

add docstrings
---
 .../commands/scrape_pacer_free_opinions.py    | 307 +++++++++---------
 1 file changed, 154 insertions(+), 153 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index 9bcda178f1..de4c339967 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -1,10 +1,13 @@
 import argparse
 import datetime
+import inspect
 import os
-from typing import Callable, Dict, List, Optional, Tuple, cast
+import time
+from typing import Callable, Dict, List, Optional, cast
 
 from celery.canvas import chain
 from django.conf import settings
+from django.db.models import Q
 from django.utils.timezone import now
 from juriscraper.lib.date_utils import make_date_range_tuples
 from juriscraper.lib.exceptions import PacerLoginException
@@ -33,11 +36,10 @@
 PACER_PASSWORD = os.environ.get("PACER_PASSWORD", settings.PACER_PASSWORD)
 
 
-def get_next_date_range(
+def get_last_complete_date(
     court_id: str,
-    span: int = 7,
-) -> Tuple[Optional[datetime.date], Optional[datetime.date]]:
-    """Get the next start and end query dates for a court.
+) -> Optional[datetime.date]:
+    """Get the next start query date for a court.
 
     Check the DB for the last date for a court that was completed. Return the
     day after that date + span days into the future as the range to query for
@@ -46,7 +48,6 @@ def get_next_date_range(
     If the court is still in progress, return (None, None).
 
     :param court_id: A PACER Court ID
-    :param span: The number of days to go forward from the last completed date
     """
     court_id = map_pacer_to_cl_id(court_id)
     try:
@@ -60,7 +61,7 @@ def get_next_date_range(
         raise
 
     if last_completion_log.status == PACERFreeDocumentLog.SCRAPE_IN_PROGRESS:
-        return None, None
+        return None
 
     # Ensure that we go back five days from the last time we had success if
     # that success was in the last few days.
@@ -68,33 +69,47 @@ def get_next_date_range(
         now().date() - datetime.timedelta(days=5),
         last_completion_log.date_queried,
     )
-    next_end_date = min(
-        now().date(), last_complete_date + datetime.timedelta(days=span)
-    )
-    return last_complete_date, next_end_date
+    return last_complete_date
 
 
 def mark_court_in_progress(
     court_id: str, d: datetime.date
 ) -> PACERFreeDocumentLog:
-    log = PACERFreeDocumentLog.objects.create(
+    """Create row with data of queried court
+
+    Stores the pacer's court id, scraping status, and the last date queried.
+
+    :param court_id: Pacer court id
+    :param d: Last date queried
+    :return: PACERFreeDocumentLog object
+    """
+    return PACERFreeDocumentLog.objects.create(
         status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
         date_queried=d,
         court_id=map_pacer_to_cl_id(court_id),
     )
-    return log
 
 
 def fetch_doc_report(
-    pacer_court_id: int,
-    start: Optional[datetime.date],
-    end: Optional[datetime.date],
-    log_id: int = 0,
-):
+    pacer_court_id: str,
+    start: datetime.date,
+    end: datetime.date,
+) -> bool:
+    """Get free documents from pacer
+
+    Get free documents from pacer and save each using PACERFreeDocumentRow model
+
+    :param pacer_court_id: Pacer court id to fetch
+    :param start: start date to query
+    :param end: end date to query
+    :return: true if an exception occurred
+    """
     exception_raised = False
     status = PACERFreeDocumentLog.SCRAPE_FAILED
     rows_to_create = 0
 
+    log = mark_court_in_progress(pacer_court_id, end)
+
     logger.info(
         "Attempting to get latest document references for "
         "%s between %s and %s",
@@ -103,7 +118,7 @@ def fetch_doc_report(
         end,
     )
     try:
-        status, rows_to_create = get_and_save_free_document_report(pacer_court_id, start, end, log_id)  # type: ignore
+        status, rows_to_create = get_and_save_free_document_report(pacer_court_id, start, end, log.pk)  # type: ignore
     except (
         RequestException,
         ReadTimeoutError,
@@ -130,18 +145,30 @@ def fetch_doc_report(
         )
         exception_raised = True
 
-    logger.info(
-        "Got %s document references for " "%s between %s and %s",
-        rows_to_create,
-        pacer_court_id,
-        start,
-        end,
-    )
+        mark_court_done_on_date(
+            log.pk,
+            PACERFreeDocumentLog.SCRAPE_FAILED,
+        )
+
+    if not exception_raised:
+        logger.info(
+            "Got %s document references for " "%s between %s and %s",
+            rows_to_create,
+            pacer_court_id,
+            start,
+            end,
+        )
+        # Scrape successful
+        mark_court_done_on_date(log.pk, status)
 
-    return exception_raised, status
+    return exception_raised
 
 
-def get_and_save_free_document_reports(options: OptionsType) -> None:
+def get_and_save_free_document_reports(
+    courts: list[Optional[str]],
+    date_start: Optional[datetime.date],
+    date_end: Optional[datetime.date],
+) -> None:
     """Query the Free Doc Reports on PACER and get a list of all the free
     documents. Do not download those items, as that step is done later. For now
     just get the list.
@@ -153,6 +180,12 @@ def get_and_save_free_document_reports(options: OptionsType) -> None:
 
     This is a simpler version, though a slower one, but it should get the job
     done.
+
+    :param courts: optionally a list of courts to scrape
+    :param date_start: optionally a start date to query all the specified courts or all
+    courts
+    :param date_end: optionally a end date to query all the specified courts or all
+    courts
     """
     # Kill any *old* logs that report they're in progress. (They've failed.)
     three_hrs_ago = now() - datetime.timedelta(hours=3)
@@ -163,111 +196,56 @@ def get_and_save_free_document_reports(options: OptionsType) -> None:
 
     excluded_court_ids = ["casb", "gub", "ilnb", "innb", "miwb", "ohsb", "prb"]
 
-    if options["courts"] != ["all"]:
-        cl_court_ids = (
-            Court.federal_courts.district_or_bankruptcy_pacer_courts()
-            .filter(
-                in_use=True,
-                end_date=None,
-                pk__in=options["courts"],
-            )
-            .exclude(pk__in=excluded_court_ids)
-            .values_list("pk", flat=True)
-        )
-    else:
-        cl_court_ids = (
-            Court.federal_courts.district_or_bankruptcy_pacer_courts()
-            .filter(
-                in_use=True,
-                end_date=None,
-            )
-            .exclude(pk__in=excluded_court_ids)
-            .values_list("pk", flat=True)
-        )
+    base_filter = Q(in_use=True, end_date=None) & ~Q(pk__in=excluded_court_ids)
+    if courts:
+        base_filter &= Q(pk__in=courts)
+
+    cl_court_ids = (
+        Court.federal_courts.district_or_bankruptcy_pacer_courts()
+        .filter(base_filter)
+        .values_list("pk", flat=True)
+    )
 
     pacer_court_ids = [map_cl_to_pacer_id(v) for v in cl_court_ids]
 
-    if options["date_start"] and options["date_end"]:
+    dates = None
+    if date_start and date_end:
         # The first date queried is 1950-05-12 from ca9, that should be the starting
         # point
-        dates = make_date_range_tuples(
-            options["date_start"], options["date_end"], gap=7
-        )
+        dates = make_date_range_tuples(date_start, date_end, gap=7)
+
+    for pacer_court_id in pacer_court_ids:
+        court_failed = False
+        if not dates:
+            date_end = now()
+            date_start = get_last_complete_date(pacer_court_id)
+            dates = make_date_range_tuples(date_start, date_end, gap=7)
+
+        # Iterate through the gap in dates either short or long
         for _start, _end in dates:
-            # Running sweep in intervals of 7 days for each court to try to avoid any
-            # blocking
-            for pacer_court_id in pacer_court_ids:
-                log = mark_court_in_progress(pacer_court_id, _end)
-
-                exc, status = fetch_doc_report(
-                    pacer_court_id, _start, _end, log.pk  # type: ignore
-                )
-                if exc:
-                    mark_court_done_on_date(
-                        log.pk,
-                        PACERFreeDocumentLog.SCRAPE_FAILED,
-                    )
-                    # Continue running the sweep but log the date range and court
-                    # where it failed to rerun it later for that specific data.
-                    logger.error(
-                        f"Sweep failed for {pacer_court_id} in the range from {options['date_start']} to {options['date_end']}",
-                        exc_info=True,
-                    )
-                    continue
-
-                mark_court_done_on_date(log.pk, status)
-    else:
-        today = now()
-        for pacer_court_id in pacer_court_ids:
-            while True:
-                next_start_d, next_end_d = get_next_date_range(pacer_court_id)
-                if next_end_d is None:
-                    logger.warning(
-                        f"Free opinion scraper for {pacer_court_id} still "
-                        "in progress."
-                    )
-                    break
-
-                log = mark_court_in_progress(pacer_court_id, next_end_d)
-
-                exc, status = fetch_doc_report(
-                    pacer_court_id, next_start_d, next_end_d, log.pk
-                )
-                if exc:
-                    # Something failed
-                    mark_court_done_on_date(
-                        log.pk,
-                        PACERFreeDocumentLog.SCRAPE_FAILED,
-                    )
-                    break
-
-                # Scrape successful
-                mark_court_done_on_date(log.pk, status)
-
-                if status == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL:
-                    if next_end_d >= today.date():
-                        logger.info(
-                            "Got all document references for '%s'.",
-                            pacer_court_id,
-                        )
-                        # Break from while loop, onwards to next court
-                        break
-                    else:
-                        # More dates to do; let it continue
-                        continue
-
-                elif status == PACERFreeDocumentLog.SCRAPE_FAILED:
-                    logger.error(
-                        "Encountered critical error on %s "
-                        "(network error?). Marking as failed and "
-                        "pressing on." % pacer_court_id,
-                        exc_info=True,
-                    )
-                    # Break from while loop, onwards to next court
-                    break
-
-
-def get_pdfs(options: OptionsType) -> None:
+            exc = fetch_doc_report(
+                pacer_court_id, _start, _end  # type: ignore
+            )
+            if exc:
+                # Something happened with the queried date range, abort process for
+                # that court
+                court_failed = True
+                break
+
+            # Wait between queries to try to avoid a possible throttling/blockage
+            time.sleep(1)
+
+        if court_failed:
+            continue
+
+
+def get_pdfs(
+    courts: list[Optional[str]],
+    date_start: datetime.date,
+    date_end: datetime.date,
+    index: bool,
+    queue: str,
+) -> None:
     """Get PDFs for the results of the Free Document Report queries.
 
     At this stage, we have rows in the PACERFreeDocumentRow table, each of
@@ -279,18 +257,17 @@ def get_pdfs(options: OptionsType) -> None:
 
     :return: None
     """
-    q = cast(str, options["queue"])
-    index = options["index"]
+    q = cast(str, queue)
     cnt = CaseNameTweaker()
     rows = PACERFreeDocumentRow.objects.filter(error_msg="")
 
-    if options["courts"] != ["all"]:
-        rows = rows.filter(court_id__in=options["courts"])
+    if courts != ["all"]:
+        rows = rows.filter(court_id__in=courts)
 
-    if options["date_start"] and options["date_end"]:
+    if date_start and date_end:
         rows = rows.filter(
-            date_filed__gte=options["date_start"],
-            date_filed__lte=options["date_end"],
+            date_filed__gte=date_start,
+            date_filed__lte=date_end,
         )
 
     rows = rows.only("pk")
@@ -322,9 +299,9 @@ def get_pdfs(options: OptionsType) -> None:
             )
 
 
-def ocr_available(options: OptionsType) -> None:
+def ocr_available(queue: str, index: bool) -> None:
     """Do the OCR for any items that need it, then save to the solr index."""
-    q = cast(str, options["queue"])
+    q = cast(str, queue)
     rds = (
         RECAPDocument.objects.filter(ocr_status=RECAPDocument.OCR_NEEDED)
         .values_list("pk", flat=True)
@@ -334,7 +311,7 @@ def ocr_available(options: OptionsType) -> None:
     throttle = CeleryThrottle(queue_name=q)
     for i, pk in enumerate(rds):
         throttle.maybe_wait()
-        if options["index"]:
+        if index:
             extract_recap_pdf.si(pk, ocr_available=True).set(
                 queue=q
             ).apply_async()
@@ -347,13 +324,13 @@ def ocr_available(options: OptionsType) -> None:
             logger.info(f"Sent {i + 1}/{count} tasks to celery so far.")
 
 
-def do_everything(options: OptionsType):
+def do_everything(courts, date_start, date_end, index, queue):
     logger.info("Running and compiling free document reports.")
-    get_and_save_free_document_reports(options)
+    get_and_save_free_document_reports(courts, date_start, date_end)
     logger.info("Getting PDFs from free document reports")
-    get_pdfs(options)
+    get_pdfs(courts, date_start, date_end, index, queue)
     logger.info("Doing OCR and saving items to Solr.")
-    ocr_available(options)
+    ocr_available(index, queue)
 
 
 class Command(VerboseCommand):
@@ -368,6 +345,38 @@ def valid_actions(self, s: str) -> Callable:
 
         return self.VALID_ACTIONS[s]
 
+    def validate_date_args(self, opts):
+        """Validate dates arguments if any
+
+        :param opts: dictionary with arguments from the command
+        :return: true if the date validations are satisfied else false
+        """
+        if not opts.get("date_start") and not opts.get("date_end"):
+            return True
+        elif not opts.get("date_start") or not opts.get("date_end"):
+            logger.error(
+                "Both --date-start and --date-end must be specified together."
+            )
+            return False
+        elif opts.get("date_start") > opts.get("date_end"):
+            logger.error(
+                "--date-end must be greater than or equal to --date-start."
+            )
+            return False
+        return True
+
+    def filter_kwargs(self, func, kwargs):
+        """Keep only the params required to call the function
+
+        :param func: function to be called by the command
+        :param kwargs: dictionary with arguments from the command
+        :return: dictionary with params required for the function
+        """
+        valid_params = inspect.signature(func).parameters.keys()
+        return {
+            key: value for key, value in kwargs.items() if key in valid_params
+        }
+
     def add_arguments(self, parser: argparse.ArgumentParser) -> None:
         parser.add_argument(
             "--action",
@@ -391,7 +400,7 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None:
         parser.add_argument(
             "--courts",
             type=str,
-            default=["all"],
+            default="",
             nargs="*",
             help="The courts that you wish to parse. Use cl ids.",
         )
@@ -413,20 +422,12 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None:
     def handle(self, *args: List[str], **options: OptionsType) -> None:
         super().handle(*args, **options)
 
-        if options["date_start"] and not options["date_end"]:
-            logger.info(
-                "Error: date-end must be specified to use date-start option."
-            )
+        if not self.validate_date_args(options):
             return
 
-        if options["date_start"] and options["date_end"]:
-            if options["date_start"] > options["date_end"]:  # type: ignore
-                logger.info(
-                    "Error: date-end must be greater or equal than date-start option."
-                )
-                return
         action = cast(Callable, options["action"])
-        action(options)
+        filtered_kwargs = self.filter_kwargs(action, options)
+        action(**filtered_kwargs)
 
     VALID_ACTIONS: Dict[str, Callable] = {
         "do-everything": do_everything,

From 398702b2311eec270aeb96c0467b3863900ffb65 Mon Sep 17 00:00:00 2001
From: Mike Lissner <mike@free.law>
Date: Tue, 30 Jul 2024 07:38:05 -0700
Subject: [PATCH 100/372] fix(tasks): Fix typo

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/corpus_importer/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 4f8b1d3872..57834f4cc5 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -1163,7 +1163,7 @@ def query_case_query_report(
 
     :param court_id: A CL court ID where we'll look things up.
     :param pacer_case_id: The Pacer Case ID to lookup.
-    :return: A twp tuple, the report data and the report HTML text.
+    :return: A two tuple, the report data and the report HTML text.
     """
 
     cookies = get_or_cache_pacer_cookies(

From b18666a23bf2a76fed87b76b1b21053fa95029d9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 11:04:12 -0600
Subject: [PATCH 101/372] feat(scraper_pacer_free_opinions): update code and
 finish adding docstrings

---
 .../commands/scrape_pacer_free_opinions.py    | 60 ++++++++++++++-----
 1 file changed, 45 insertions(+), 15 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index de4c339967..ff96862bc4 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -48,6 +48,7 @@ def get_last_complete_date(
     If the court is still in progress, return (None, None).
 
     :param court_id: A PACER Court ID
+    :return: last date queried for the specified court or None if it is in progress
     """
     court_id = map_pacer_to_cl_id(court_id)
     try:
@@ -81,7 +82,7 @@ def mark_court_in_progress(
 
     :param court_id: Pacer court id
     :param d: Last date queried
-    :return: PACERFreeDocumentLog object
+    :return: new PACERFreeDocumentLog object
     """
     return PACERFreeDocumentLog.objects.create(
         status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS,
@@ -102,7 +103,7 @@ def fetch_doc_report(
     :param pacer_court_id: Pacer court id to fetch
     :param start: start date to query
     :param end: end date to query
-    :return: true if an exception occurred
+    :return: true if an exception occurred else false
     """
     exception_raised = False
     status = PACERFreeDocumentLog.SCRAPE_FAILED
@@ -184,7 +185,7 @@ def get_and_save_free_document_reports(
     :param courts: optionally a list of courts to scrape
     :param date_start: optionally a start date to query all the specified courts or all
     courts
-    :param date_end: optionally a end date to query all the specified courts or all
+    :param date_end: optionally an end date to query all the specified courts or all
     courts
     """
     # Kill any *old* logs that report they're in progress. (They've failed.)
@@ -210,15 +211,24 @@ def get_and_save_free_document_reports(
 
     dates = None
     if date_start and date_end:
+        # If we pass the dates in the command then we generate the range on those dates
         # The first date queried is 1950-05-12 from ca9, that should be the starting
-        # point
+        # point for the sweep
         dates = make_date_range_tuples(date_start, date_end, gap=7)
 
     for pacer_court_id in pacer_court_ids:
         court_failed = False
         if not dates:
-            date_end = now()
+            # We don't pass the dates in the command, so we generate the range based
+            # on each court
+            date_end = datetime.date.today()
             date_start = get_last_complete_date(pacer_court_id)
+            if not date_start:
+                logger.warning(
+                    f"Free opinion scraper for {pacer_court_id} still "
+                    "in progress."
+                )
+                continue
             dates = make_date_range_tuples(date_start, date_end, gap=7)
 
         # Iterate through the gap in dates either short or long
@@ -232,7 +242,8 @@ def get_and_save_free_document_reports(
                 court_failed = True
                 break
 
-            # Wait between queries to try to avoid a possible throttling/blockage
+            # Wait 1s between queries to try to avoid a possible throttling/blocking
+            # from the court
             time.sleep(1)
 
         if court_failed:
@@ -255,22 +266,26 @@ def get_pdfs(
     In this function, we iterate over the entire table of results, merge it
     into our normal tables, and then download and extract the PDF.
 
+    :param courts: optionally a list of courts to scrape
+    :param date_start: optionally a start date to query all the specified courts or all
+    courts
+    :param date_end: optionally an end date to query all the specified courts or all
+    courts
+    :param index: true if we should index as we process the data or do it later
+    :param queue: the queue name
     :return: None
     """
     q = cast(str, queue)
     cnt = CaseNameTweaker()
-    rows = PACERFreeDocumentRow.objects.filter(error_msg="")
+    base_filter = Q(error_msg="")
 
-    if courts != ["all"]:
-        rows = rows.filter(court_id__in=courts)
+    if courts:
+        base_filter &= Q(court_id__in=courts)
 
     if date_start and date_end:
-        rows = rows.filter(
-            date_filed__gte=date_start,
-            date_filed__lte=date_end,
-        )
+        base_filter &= Q(date_filed__gte=date_start, date_filed__lte=date_end)
 
-    rows = rows.only("pk")
+    rows = PACERFreeDocumentRow.objects.filter(base_filter).only("pk")
     count = rows.count()
     task_name = "downloading"
     if index:
@@ -300,7 +315,11 @@ def get_pdfs(
 
 
 def ocr_available(queue: str, index: bool) -> None:
-    """Do the OCR for any items that need it, then save to the solr index."""
+    """Do the OCR for any items that need it, then save to the solr index.
+
+    :param queue: the queue name
+    :param index: true if we should index as we process the data or do it later
+    """
     q = cast(str, queue)
     rds = (
         RECAPDocument.objects.filter(ocr_status=RECAPDocument.OCR_NEEDED)
@@ -325,6 +344,17 @@ def ocr_available(queue: str, index: bool) -> None:
 
 
 def do_everything(courts, date_start, date_end, index, queue):
+    """Execute the entire process of obtaining the metadata of the free documents,
+    downloading them and ingesting them into the system
+
+    :param courts: optionally a list of courts to scrape
+    :param date_start: optionally a start date to query all the specified courts or all
+    courts
+    :param date_end: optionally an end date to query all the specified courts or all
+    courts
+    :param index: true if we should index as we process the data or do it later
+    :param queue: the queue name
+    """
     logger.info("Running and compiling free document reports.")
     get_and_save_free_document_reports(courts, date_start, date_end)
     logger.info("Getting PDFs from free document reports")

From 054525e3de10842960cc8258f1852aab10dcdb06 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Tue, 30 Jul 2024 16:07:12 -0400
Subject: [PATCH 102/372] feat(neon): Refine account search by filtering out
 company accounts

---
 cl/lib/neon_utils.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/cl/lib/neon_utils.py b/cl/lib/neon_utils.py
index fcd9bef2fe..4e35c1ea39 100644
--- a/cl/lib/neon_utils.py
+++ b/cl/lib/neon_utils.py
@@ -71,7 +71,12 @@ def search_account_by_email(self, email: str) -> list[dict[str, str]]:
         """
         search_payload = {
             "searchFields": [
-                {"field": "Email", "operator": "EQUAL", "value": email}
+                {"field": "Email", "operator": "EQUAL", "value": email},
+                {
+                    "field": "Account Type",
+                    "operator": "EQUAL",
+                    "value": "Individual",
+                },
             ],
             "outputFields": ["Account ID"],
             "pagination": {"pageSize": 10},

From 8761c8dc833a017122342177209a387327fc8770 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 30 Jul 2024 16:27:24 -0400
Subject: [PATCH 103/372] feat(search.models): Remove db_index for new field

---
 cl/search/models.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 4b5c7445df..6cc37f4b18 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -563,7 +563,6 @@ class Docket(AbstractDateTimeModel, DocketSources):
         "the : is the office code.",
         max_length=1,
         blank=True,
-        db_index=True,
     )
     case_type = models.CharField(
         help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), "
@@ -572,7 +571,6 @@ class Docket(AbstractDateTimeModel, DocketSources):
         "characters.",
         max_length=5,
         blank=True,
-        db_index=True,
     )
     judge_initials = models.CharField(
         help_text="A typically three-letter upper cased abbreviation "
@@ -581,14 +579,12 @@ class Docket(AbstractDateTimeModel, DocketSources):
         "new judge takes over a case.",
         max_length=4,
         blank=True,
-        db_index=True,
     )
     defendant_number = models.SmallIntegerField(
         help_text="A unique number assigned to each defendant in a case, "
         "typically found in pacer criminal cases as a -1, -2 after "
         "the judge initials. Example: 1:14-cr-10363-RGS-1.",
         null=True,
-        db_index=True,
     )
     # Nullable for unique constraint requirements.
     pacer_case_id = fields.CharNullField(

From a8ba14e0af759a11cc899ca1359c5445a293267d Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 30 Jul 2024 16:32:09 -0400
Subject: [PATCH 104/372] feat(search.models): Update migration files

---
 ...er_fields.py => 0032_update_docket_numbering_fields.py} | 6 +-----
 ..._fields.sql => 0032_update_docket_numbering_fields.sql} | 7 -------
 2 files changed, 1 insertion(+), 12 deletions(-)
 rename cl/search/migrations/{0032_update_docket_model_docket_number_fields.py => 0032_update_docket_numbering_fields.py} (98%)
 rename cl/search/migrations/{0032_update_docket_model_docket_number_fields.sql => 0032_update_docket_numbering_fields.sql} (95%)

diff --git a/cl/search/migrations/0032_update_docket_model_docket_number_fields.py b/cl/search/migrations/0032_update_docket_numbering_fields.py
similarity index 98%
rename from cl/search/migrations/0032_update_docket_model_docket_number_fields.py
rename to cl/search/migrations/0032_update_docket_numbering_fields.py
index 1b85fe86bc..1ef46be59a 100644
--- a/cl/search/migrations/0032_update_docket_model_docket_number_fields.py
+++ b/cl/search/migrations/0032_update_docket_numbering_fields.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-07-25 17:08
+# Generated by Django 5.0.7 on 2024-07-30 20:29
 
 import django.db.models.deletion
 import pgtrigger.compiler
@@ -25,7 +25,6 @@ class Migration(migrations.Migration):
             name="case_type",
             field=models.CharField(
                 blank=True,
-                db_index=True,
                 help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
                 max_length=5,
             ),
@@ -34,7 +33,6 @@ class Migration(migrations.Migration):
             model_name="docket",
             name="defendant_number",
             field=models.SmallIntegerField(
-                db_index=True,
                 help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
                 null=True,
             ),
@@ -44,7 +42,6 @@ class Migration(migrations.Migration):
             name="judge_initials",
             field=models.CharField(
                 blank=True,
-                db_index=True,
                 help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
                 max_length=4,
             ),
@@ -54,7 +51,6 @@ class Migration(migrations.Migration):
             name="office_code",
             field=models.CharField(
                 blank=True,
-                db_index=True,
                 help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
                 max_length=1,
             ),
diff --git a/cl/search/migrations/0032_update_docket_model_docket_number_fields.sql b/cl/search/migrations/0032_update_docket_numbering_fields.sql
similarity index 95%
rename from cl/search/migrations/0032_update_docket_model_docket_number_fields.sql
rename to cl/search/migrations/0032_update_docket_numbering_fields.sql
index 881e9a2587..97ac5fc363 100644
--- a/cl/search/migrations/0032_update_docket_model_docket_number_fields.sql
+++ b/cl/search/migrations/0032_update_docket_numbering_fields.sql
@@ -157,13 +157,6 @@ ALTER TABLE "search_docketevent" ADD COLUMN "parent_docket_id" integer NULL;
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket" IS '5b02a9c2cedd3b32a8c455a966bf31b02576d67f';
 
-CREATE INDEX "search_docket_case_type_f76fd404" ON "search_docket" ("case_type");
-CREATE INDEX "search_docket_case_type_f76fd404_like" ON "search_docket" ("case_type" varchar_pattern_ops);
-CREATE INDEX "search_docket_defendant_number_04074363" ON "search_docket" ("defendant_number");
-CREATE INDEX "search_docket_judge_initials_2513584d" ON "search_docket" ("judge_initials");
-CREATE INDEX "search_docket_judge_initials_2513584d_like" ON "search_docket" ("judge_initials" varchar_pattern_ops);
-CREATE INDEX "search_docket_office_code_51016743" ON "search_docket" ("office_code");
-CREATE INDEX "search_docket_office_code_51016743_like" ON "search_docket" ("office_code" varchar_pattern_ops);
 CREATE INDEX "search_docket_parent_docket_id_1a514426" ON "search_docket" ("parent_docket_id");
 CREATE INDEX "search_docketevent_parent_docket_id_c7c9c9ad" ON "search_docketevent" ("parent_docket_id");
 COMMIT;
\ No newline at end of file

From 0050caa417378a0969c8310e7f66b02728eeb0da Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 18:44:53 -0600
Subject: [PATCH 105/372] feat(opinion_order): rename order to ordering_key

ignore combined opinions, don't add order number
update fixtures
update tests
---
 .../import_columbia/columbia_utils.py         |  4 +-
 .../commands/update_opinions_order.py         | 99 ++++++-------------
 cl/search/fixtures/functest_opinions.json     |  8 +-
 cl/search/fixtures/opinions-issue-412.json    |  4 +-
 cl/search/fixtures/opinions-issue-550.json    |  4 +-
 .../fixtures/test_objects_query_counts.json   | 12 +--
 cl/search/fixtures/test_objects_search.json   | 12 +--
 cl/search/migrations/0032_order_opinions.py   | 19 ++--
 cl/search/migrations/0032_order_opinions.sql  | 22 ++---
 cl/search/models.py                           |  5 +-
 cl/search/tests/tests.py                      | 14 +--
 .../fixtures/api_scotus_map_data.json         |  4 +-
 .../fixtures/scotus_map_data.json             | 34 +++----
 13 files changed, 102 insertions(+), 139 deletions(-)

diff --git a/cl/corpus_importer/import_columbia/columbia_utils.py b/cl/corpus_importer/import_columbia/columbia_utils.py
index b1a62cfd6c..dec91fc1da 100644
--- a/cl/corpus_importer/import_columbia/columbia_utils.py
+++ b/cl/corpus_importer/import_columbia/columbia_utils.py
@@ -224,7 +224,7 @@ def extract_columbia_opinions(
     """
     opinions: list = []
     floating_content = []
-    order = 0
+    order = 1  # The opinion count starts from 1
 
     # We iterate all content to look for all possible opinions
     for i, content in enumerate(outer_opinion):  # type: int, Tag
@@ -363,7 +363,7 @@ def process_extracted_opinions(extracted_opinions: list) -> list:
 
     opinions: list = []
     authorless_content = []
-    order = 0
+    order = 1  # The opinion count starts from 1
 
     for i, found_content in enumerate(extracted_opinions, start=1):
         byline = found_content.get("byline")
diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 85ed93e0e2..5c91d0e4b1 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -38,38 +38,29 @@ def clean_opinion_content(text: str) -> str:
     # Replace line breaks with spaces and get rid of double spaces
     text = re.sub(" +", " ", " ".join(text.split("\n"))).strip()
 
-    # Remove non-alphanumeric and non-whitespace characters from lowercased text
+    # Remove non-alphanumeric and non-whitespace characters from lowercase text
     return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
 
 
 def get_opinions_cleaned_content(
     cluster_id,
-) -> tuple[Optional[str], list[dict], int, bool]:
+) -> tuple[Optional[str], list[dict]]:
     """Get cleaned opinions content for a cluster object
 
     :param cluster_id: Cluster ID for a set of opinions
-    :return: (xml path, list of extracted opinions, start position, True if combined
-    opinions exists in cluster)
+    :return: (xml path, list of extracted opinions)
     """
     cl_cleaned_opinions = []
     # by default the opinions are ordered by pk
-    opinions_from_cluster = Opinion.objects.filter(
-        cluster_id=cluster_id
-    ).order_by("id")
-    combined_opinions_cluster = opinions_from_cluster.filter(
-        type="010combined"
+    opinions_from_cluster = (
+        Opinion.objects.filter(cluster_id=cluster_id)
+        .order_by("id")
+        .exclude(type="010combined")
     )
+
     xml_path = None
-    cluster_has_combined_opinion = False
-    if combined_opinions_cluster:
-        # the combined opinion will be displayed at beginning
-        start_position = combined_opinions_cluster.count()
-        cluster_has_combined_opinion = True
-    else:
-        # we don't have combined opinions, we start ordering from 0 to n
-        start_position = 0
-
-    for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
+
+    for i, op in enumerate(opinions_from_cluster):
         if op.local_path and not xml_path:
             xml_path = str(op.local_path)
 
@@ -101,8 +92,6 @@ def get_opinions_cleaned_content(
     return (
         xml_path,
         cl_cleaned_opinions,
-        start_position,
-        cluster_has_combined_opinion,
     )
 
 
@@ -170,26 +159,12 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None:
     # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
         logger.info(f"Processing cluster id: {oc}")
-        combined_opinions_cluster = oc.sub_opinions.filter(
-            type="010combined"
-        ).order_by("id")
-        if combined_opinions_cluster:
-            # the combined opinion will be displayed at first
-            start_position = combined_opinions_cluster.count()
-        else:
-            # we don't have combined opinions, we start ordering from 0 to n
-            start_position = 0
 
         for opinion_order, cluster_op in enumerate(
             oc.sub_opinions.exclude(type="010combined").order_by("id"),
-            start=start_position,
+            start=1,
         ):
-            cluster_op.order = opinion_order
-            cluster_op.save()
-
-        # Show combined opinions at beginning
-        for opinion_order, cluster_op in enumerate(combined_opinions_cluster):
-            cluster_op.order = opinion_order
+            cluster_op.ordering_key = opinion_order
             cluster_op.save()
 
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
@@ -200,18 +175,13 @@ def update_opinions(
     cl_opinions: list,
     columbia_opinions: list,
     matches: dict,
-    cluster_has_combined_opinion: bool,
-    start_position: int,
 ):
     """Update opinions with correct order
 
     :param cluster_id:
     :param cl_opinions: a list with cleaned opinions from cl
-    :param columbia_opinions: a ordered list with cleaned opinions from xml file
+    :param columbia_opinions: an ordered list with cleaned opinions from xml file
     :param matches: a dict with the matches of each opinion of both lists
-    :param cluster_has_combined_opinion: True if the cluster has combined opinions
-    :param start_position: the number from where the order should begin for
-    non-combined opinions
     :return: None
     """
     update_failed = False
@@ -221,7 +191,7 @@ def update_opinions(
             # file_pos is the correct index to find the opinion id to update
             file_opinion = columbia_opinions[file_pos]
             # the order was calculated using the xml file
-            file_order = file_opinion.get("order") + start_position
+            file_order = file_opinion.get("order")
             cl_opinion = cl_opinions[cl_pos]
             opinion_id_to_update = cl_opinion.get("id")
 
@@ -229,11 +199,10 @@ def update_opinions(
                 try:
                     # Update opinion order
                     op = Opinion.objects.get(id=opinion_id_to_update)
-                    op.order = file_order
+                    op.ordering_key = file_order
                     op.save()
                 except Opinion.DoesNotExist:
-                    # This should not happen, but it is better to be
-                    # cautious
+                    # This should not happen, but it is better to be cautious
                     logger.warning(
                         f"We can't update opinion, opinion doesn't exist "
                         f"with id: {opinion_id_to_update}"
@@ -241,18 +210,6 @@ def update_opinions(
                     update_failed = True
                     break
 
-        if cluster_has_combined_opinion and not update_failed:
-            combined_opinions_cluster = Opinion.objects.filter(
-                cluster_id=cluster_id, type="010combined"
-            ).order_by("id")
-
-            # Show combined opinions at beginning
-            for opinion_order, cluster_op in enumerate(
-                combined_opinions_cluster
-            ):
-                cluster_op.order = opinion_order
-                cluster_op.save()
-
         if update_failed:
             # There was an error updating an opinion, rollback all changes for
             # cluster's opinions
@@ -294,12 +251,9 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
         logger.info(f"Processing cluster id: {cluster_id}")
 
         try:
-            (
-                xml_path,
-                cl_cleaned_opinions,
-                start_position,
-                cluster_has_combined_opinion,
-            ) = get_opinions_cleaned_content(cluster_id)
+            xml_path, cl_cleaned_opinions = get_opinions_cleaned_content(
+                cluster_id
+            )
         except EmptyOpinionException:
             logger.warning(
                 f"At least one of the opinions from cluster id: {cluster_id} is empty."
@@ -321,7 +275,9 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                     fixed_xml_filepath
                 )
             except UnicodeDecodeError:
-                logger.warning(f"Cannot decode file: {fixed_xml_filepath}")
+                logger.warning(
+                    f"Cannot decode file: {fixed_xml_filepath}, cluster id: {cluster_id}"
+                )
                 continue
 
         if cl_cleaned_opinions and extracted_columbia_opinions:
@@ -336,6 +292,13 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                 if op.get("opinion")
             ]
 
+            if len(columbia_opinions_content) != len(cl_opinions_content):
+                logger.warning(
+                    f"The number of opinions in cl and the number of opinions in the xml is different, cluster id: {cluster_id}"
+                )
+                continue
+
+            # Try to match content between cl and xml
             matches = match_opinion_lists(
                 columbia_opinions_content,
                 cl_opinions_content,
@@ -360,14 +323,12 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                     # Go to next cluster id
                     continue
 
-                # Update all opinions order
+                # All opinions matched, update all opinions order
                 update_opinions(
                     cluster_id,
                     cl_cleaned_opinions,
                     extracted_columbia_opinions,
                     matches,
-                    cluster_has_combined_opinion,
-                    start_position,
                 )
 
 
diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json
index 6bc9333003..2cc992a633 100644
--- a/cl/search/fixtures/functest_opinions.json
+++ b/cl/search/fixtures/functest_opinions.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -136,7 +136,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 11
@@ -187,7 +187,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 2
+      "ordering_key": 2
     },
     "model": "search.opinion",
     "pk": 12
@@ -258,7 +258,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 12
diff --git a/cl/search/fixtures/opinions-issue-412.json b/cl/search/fixtures/opinions-issue-412.json
index 2e429ebecf..0e7fbdc7e6 100644
--- a/cl/search/fixtures/opinions-issue-412.json
+++ b/cl/search/fixtures/opinions-issue-412.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -136,7 +136,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json
index c5f07cea17..3e359b044d 100644
--- a/cl/search/fixtures/opinions-issue-550.json
+++ b/cl/search/fixtures/opinions-issue-550.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -88,7 +88,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "order": 2
+      "ordering_key": 2
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json
index af8b7f3e54..6a3f97da23 100644
--- a/cl/search/fixtures/test_objects_query_counts.json
+++ b/cl/search/fixtures/test_objects_query_counts.json
@@ -301,7 +301,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"020lead",
-         "order": 1
+         "ordering_key": 1
       },
       "model":"search.opinion",
       "pk":1
@@ -326,7 +326,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "ordering_key": 1
       },
       "model":"search.opinion",
       "pk":2
@@ -351,7 +351,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "ordering_key": 1
       },
       "model":"search.opinion",
       "pk":3
@@ -375,7 +375,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 2
+         "ordering_key": 2
       },
       "model":"search.opinion",
       "pk":4
@@ -400,7 +400,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 3
+         "ordering_key": 3
       },
       "model":"search.opinion",
       "pk":5
@@ -424,7 +424,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 4
+         "ordering_key": 4
       },
       "model":"search.opinion",
       "pk":6
diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index e9a89f1ea5..542d297d54 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -240,7 +240,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 1
@@ -263,7 +263,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 2
@@ -286,7 +286,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 3
@@ -309,7 +309,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 2
+      "ordering_key": 2
     },
     "model": "search.opinion",
     "pk": 4
@@ -332,7 +332,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 3
+      "ordering_key": 3
     },
     "model": "search.opinion",
     "pk": 5
@@ -355,7 +355,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 4
+      "ordering_key": 4
     },
     "model": "search.opinion",
     "pk": 6
diff --git a/cl/search/migrations/0032_order_opinions.py b/cl/search/migrations/0032_order_opinions.py
index 9c7f3fa5d3..9b4db9fbe7 100644
--- a/cl/search/migrations/0032_order_opinions.py
+++ b/cl/search/migrations/0032_order_opinions.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-07-25 23:17
+# Generated by Django 5.0.7 on 2024-07-30 18:59
 
 import pgtrigger.compiler
 import pgtrigger.migrations
@@ -25,12 +25,12 @@ class Migration(migrations.Migration):
         ),
         migrations.AddField(
             model_name="opinion",
-            name="order",
+            name="ordering_key",
             field=models.IntegerField(blank=True, null=True),
         ),
         migrations.AddField(
             model_name="opinionevent",
-            name="order",
+            name="ordering_key",
             field=models.IntegerField(blank=True, null=True),
         ),
         pgtrigger.migrations.AddTrigger(
@@ -38,9 +38,9 @@ class Migration(migrations.Migration):
             trigger=pgtrigger.compiler.Trigger(
                 name="update_or_delete_snapshot_update",
                 sql=pgtrigger.compiler.UpsertTriggerSql(
-                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."order" IS DISTINCT FROM (NEW."order"))',
-                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
-                    hash="89fec08f03e567ec8ecc7cd1e8ec5f665abf9d3b",
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."ordering_key" IS DISTINCT FROM (NEW."ordering_key"))',
+                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
+                    hash="7137855274503cc2c50a17729f82e150d2b7d872",
                     operation="UPDATE",
                     pgid="pgtrigger_update_or_delete_snapshot_update_67ecd",
                     table="search_opinion",
@@ -53,8 +53,8 @@ class Migration(migrations.Migration):
             trigger=pgtrigger.compiler.Trigger(
                 name="update_or_delete_snapshot_delete",
                 sql=pgtrigger.compiler.UpsertTriggerSql(
-                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
-                    hash="79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad",
+                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
+                    hash="98fb52aa60fd8e89a83f8f7ac77ba5892739fb37",
                     operation="DELETE",
                     pgid="pgtrigger_update_or_delete_snapshot_delete_1f4fd",
                     table="search_opinion",
@@ -65,7 +65,8 @@ class Migration(migrations.Migration):
         migrations.AddConstraint(
             model_name="opinion",
             constraint=models.UniqueConstraint(
-                fields=("cluster_id", "order"), name="unique_opinion_order"
+                fields=("cluster_id", "ordering_key"),
+                name="unique_opinion_ordering_key",
             ),
         ),
     ]
diff --git a/cl/search/migrations/0032_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql
index 01cac8adf7..e02c150f4d 100644
--- a/cl/search/migrations/0032_order_opinions.sql
+++ b/cl/search/migrations/0032_order_opinions.sql
@@ -8,13 +8,13 @@ DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "sear
 --
 DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
 --
--- Add field order to opinion
+-- Add field ordering_key to opinion
 --
-ALTER TABLE "search_opinion" ADD COLUMN "order" integer NULL;
+ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
 --
--- Add field order to opinionevent
+-- Add field ordering_key to opinionevent
 --
-ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
+ALTER TABLE "search_opinionevent" ADD COLUMN "ordering_key" integer NULL;
 --
 -- Create trigger update_or_delete_snapshot_update on model opinion
 --
@@ -53,7 +53,7 @@ ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
                             RETURN NEW;
                         END IF;
                     END IF;
-                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
+                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
                 END;
             $$ LANGUAGE plpgsql;
 
@@ -62,10 +62,10 @@ ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
                 AFTER UPDATE ON "search_opinion"
                 
                 
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."order" IS DISTINCT FROM (NEW."order"))
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."ordering_key" IS DISTINCT FROM (NEW."ordering_key"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '89fec08f03e567ec8ecc7cd1e8ec5f665abf9d3b';
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '7137855274503cc2c50a17729f82e150d2b7d872';
         
 --
 -- Create trigger update_or_delete_snapshot_delete on model opinion
@@ -105,7 +105,7 @@ ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
                             RETURN NEW;
                         END IF;
                     END IF;
-                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
+                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
                 END;
             $$ LANGUAGE plpgsql;
 
@@ -117,10 +117,10 @@ ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
                 FOR EACH ROW 
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad';
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '98fb52aa60fd8e89a83f8f7ac77ba5892739fb37';
         
 --
--- Create constraint unique_opinion_order on model opinion
+-- Create constraint unique_opinion_ordering_key on model opinion
 --
-ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_order" UNIQUE ("cluster_id", "order");
+ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_ordering_key" UNIQUE ("cluster_id", "ordering_key");
 COMMIT;
diff --git a/cl/search/models.py b/cl/search/models.py
index d6c17ba4f8..a0c9fa7eef 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3320,12 +3320,13 @@ class Opinion(AbstractDateTimeModel):
             "sha1",
         ]
     )
-    order = models.IntegerField(null=True, blank=True)
+    ordering_key = models.IntegerField(null=True, blank=True)
 
     class Meta:
         constraints = [
             models.UniqueConstraint(
-                fields=["cluster_id", "order"], name="unique_opinion_order"
+                fields=["cluster_id", "ordering_key"],
+                name="unique_opinion_ordering_key",
             )
         ]
 
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index 5c57cf72bd..ca5c384651 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -335,14 +335,14 @@ def test_opinions_order(self) -> None:
 
         # Test that the value of the order field matches the order in which
         # they were created
-        self.assertEqual(op_1.order, 1)
-        self.assertEqual(op_2.order, 2)
-        self.assertEqual(op_3.order, 3)
+        self.assertEqual(op_1.ordering_key, 1)
+        self.assertEqual(op_2.ordering_key, 2)
+        self.assertEqual(op_3.ordering_key, 3)
 
         # Can we update an opinion using an existing position?
         with transaction.atomic():
             with self.assertRaises(IntegrityError):
-                op_3.order = 2
+                op_3.ordering_key = 2
                 op_3.save()
 
         # Can we create an opinion using an existing position?
@@ -354,13 +354,13 @@ def test_opinions_order(self) -> None:
 
         # Can we use negative positions?
         op_4 = OpinionFactory(cluster=cluster, type="Lead Opinion", order=-1)
-        self.assertEqual(op_4.order, -1)
+        self.assertEqual(op_4.ordering_key, -1)
 
         # Can we order the opinions from a cluster using the field?
         qs = (
             cluster.sub_opinions.all()
-            .order_by("order")
-            .values_list("order", flat=True)
+            .order_by("ordering_key")
+            .values_list("ordering_key", flat=True)
         )
         self.assertEqual(list(qs), [-1, 1, 2, 3])
 
diff --git a/cl/visualizations/fixtures/api_scotus_map_data.json b/cl/visualizations/fixtures/api_scotus_map_data.json
index 46dc2f9856..3a13c3e4e7 100644
--- a/cl/visualizations/fixtures/api_scotus_map_data.json
+++ b/cl/visualizations/fixtures/api_scotus_map_data.json
@@ -122,7 +122,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 1
@@ -145,7 +145,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 2
diff --git a/cl/visualizations/fixtures/scotus_map_data.json b/cl/visualizations/fixtures/scotus_map_data.json
index a885e4df54..e0760f42bf 100644
--- a/cl/visualizations/fixtures/scotus_map_data.json
+++ b/cl/visualizations/fixtures/scotus_map_data.json
@@ -903,7 +903,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111014
@@ -926,7 +926,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111113
@@ -949,7 +949,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111464
@@ -972,7 +972,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111505
@@ -995,7 +995,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111924
@@ -1018,7 +1018,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 112331
@@ -1041,7 +1041,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 112646
@@ -1064,7 +1064,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 112779
@@ -1087,7 +1087,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 112874
@@ -1110,7 +1110,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 117967
@@ -1133,7 +1133,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 118377
@@ -1156,7 +1156,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 121168
@@ -1179,7 +1179,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 136984
@@ -1202,7 +1202,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 142900
@@ -1225,7 +1225,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 799990
@@ -1248,7 +1248,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 799993
@@ -1271,7 +1271,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 2674862

From 3eeaafe572121c230459f8f9b36637bd21c4392d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 18:56:56 -0600
Subject: [PATCH 106/372] feat(opinion_order): update model

---
 cl/search/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index a0c9fa7eef..2c73363836 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3355,12 +3355,12 @@ def save(
         *args: List,
         **kwargs: Dict,
     ) -> None:
-        if self.pk is None and self.order is None:
+        if self.pk is None and self.ordering_key is None:
             # Add order in new opinions with no defined order value
             last_position = Opinion.objects.filter(
                 cluster=self.cluster
             ).aggregate(models.Max("order"))["order__max"]
-            self.order = (last_position or 0) + 1
+            self.ordering_key = (last_position or 0) + 1
         super().save(*args, **kwargs)
         if index:
             from cl.search.tasks import add_items_to_solr

From ac98d938cef121df8fe7cac8fff12ae58a11a08f Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 19:56:53 -0600
Subject: [PATCH 107/372] feat(opinion_order): update model

---
 cl/search/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 2c73363836..5e755f5062 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3359,7 +3359,7 @@ def save(
             # Add order in new opinions with no defined order value
             last_position = Opinion.objects.filter(
                 cluster=self.cluster
-            ).aggregate(models.Max("order"))["order__max"]
+            ).aggregate(models.Max("ordering_key"))["ordering_key__max"]
             self.ordering_key = (last_position or 0) + 1
         super().save(*args, **kwargs)
         if index:

From c22eb04c0de730962727f3b596498211274544a1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 19:57:40 -0600
Subject: [PATCH 108/372] feat(opinion_order): update model

---
 cl/search/models.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 5e755f5062..a6b54b9819 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3355,12 +3355,6 @@ def save(
         *args: List,
         **kwargs: Dict,
     ) -> None:
-        if self.pk is None and self.ordering_key is None:
-            # Add order in new opinions with no defined order value
-            last_position = Opinion.objects.filter(
-                cluster=self.cluster
-            ).aggregate(models.Max("ordering_key"))["ordering_key__max"]
-            self.ordering_key = (last_position or 0) + 1
         super().save(*args, **kwargs)
         if index:
             from cl.search.tasks import add_items_to_solr

From 60744a19e7dca4757ccf692c29deded2cf32185a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 20:22:47 -0600
Subject: [PATCH 109/372] feat(opinion_order): update tests

---
 cl/search/tests/tests.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index ca5c384651..a32ffe8868 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -321,16 +321,19 @@ def test_opinions_order(self) -> None:
         op_1 = OpinionFactory(
             cluster=cluster,
             type="Concurrence Opinion",
+            ordering_key=1,
         )
 
         op_2 = OpinionFactory(
             cluster=cluster,
             type="Dissent",
+            ordering_key=2,
         )
 
         op_3 = OpinionFactory(
             cluster=cluster,
             type="Lead Opinion",
+            ordering_key=3,
         )
 
         # Test that the value of the order field matches the order in which
@@ -349,11 +352,13 @@ def test_opinions_order(self) -> None:
         with transaction.atomic():
             with self.assertRaises(IntegrityError):
                 op_4 = OpinionFactory(
-                    cluster=cluster, type="Lead Opinion", order=1
+                    cluster=cluster, type="Lead Opinion", ordering_key=1
                 )
 
         # Can we use negative positions?
-        op_4 = OpinionFactory(cluster=cluster, type="Lead Opinion", order=-1)
+        op_4 = OpinionFactory(
+            cluster=cluster, type="Lead Opinion", ordering_key=-1
+        )
         self.assertEqual(op_4.ordering_key, -1)
 
         # Can we order the opinions from a cluster using the field?
@@ -364,6 +369,10 @@ def test_opinions_order(self) -> None:
         )
         self.assertEqual(list(qs), [-1, 1, 2, 3])
 
+        # Order default value is null
+        op_5 = OpinionFactory(cluster=cluster, type="Lead Opinion")
+        self.assertEqual(op_5.ordering_key, None)
+
 
 class DocketValidationTest(TestCase):
     @classmethod

From 4e5cfb1972ef04307318687c4e356fd48bca335d Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 31 Jul 2024 16:20:05 -0500
Subject: [PATCH 110/372] fix(elasticsearch): Use estimated counts in the
 frontend search results.

Fixes: #4255
---
 cl/lib/elasticsearch_utils.py              | 57 ++++++++++++++++------
 cl/search/api_utils.py                     | 33 +++++++------
 cl/search/constants.py                     | 10 ++++
 cl/search/templates/search.html            |  8 ++-
 cl/search/tests/tests.py                   | 22 +++++++++
 cl/search/tests/tests_es_opinion.py        | 28 +++++++++++
 cl/search/tests/tests_es_oral_arguments.py | 28 +++++++++++
 cl/search/tests/tests_es_person.py         | 28 +++++++++++
 cl/search/tests/tests_es_recap.py          | 31 +++++++-----
 cl/search/views.py                         |  2 +-
 cl/tests/cases.py                          | 12 ++++-
 11 files changed, 215 insertions(+), 44 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 24d49257f7..127d25ae65 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -70,6 +70,7 @@
     SEARCH_RECAP_HL_FIELDS,
     SEARCH_RECAP_PARENT_QUERY_FIELDS,
     api_child_highlight_map,
+    cardinality_query_unique_ids,
 )
 from cl.search.exception import (
     BadProximityQuery,
@@ -1938,12 +1939,19 @@ def fetch_es_results(
         # Set size to 0 to avoid retrieving documents in the count queries for
         # better performance. Set track_total_hits to True to consider all the
         # documents.
-        main_doc_count_query = main_doc_count_query.extra(
-            size=0, track_total_hits=True
+
+        search_type = get_params.get("type", SEARCH_TYPES.OPINION)
+        parent_unique_field = cardinality_query_unique_ids[search_type]
+        main_doc_count_query = build_cardinality_count(
+            main_doc_count_query, parent_unique_field
         )
+
         if child_docs_count_query:
-            child_total_query = child_docs_count_query.extra(
-                size=0, track_total_hits=True
+            child_unique_field = cardinality_query_unique_ids[
+                SEARCH_TYPES.RECAP_DOCUMENT
+            ]
+            child_total_query = build_cardinality_count(
+                child_docs_count_query, child_unique_field
             )
 
         # Execute the ES main query + count queries in a single request.
@@ -1955,10 +1963,14 @@ def fetch_es_results(
 
         main_response = responses[0]
         main_doc_count_response = responses[1]
-        parent_total = main_doc_count_response.hits.total.value
+        parent_total = simplify_estimated_count(
+            main_doc_count_response.hits.total.value
+        )
         if child_total_query:
             child_doc_count_response = responses[2]
-            child_total = child_doc_count_response.hits.total.value
+            child_total = simplify_estimated_count(
+                child_doc_count_response.hits.total.value
+            )
 
         query_time = main_response.took
         search_type = get_params.get("type", SEARCH_TYPES.OPINION)
@@ -2927,31 +2939,28 @@ def do_es_api_query(
     return main_query, child_docs_query
 
 
-def build_cardinality_count(
-    base_query: Search, query: Query, unique_field: str
-) -> Search:
+def build_cardinality_count(count_query: Search, unique_field: str) -> Search:
     """Build an Elasticsearch cardinality aggregation.
     This aggregation estimates the count of unique documents based on the
     specified unique field. The precision_threshold, set by
     ELASTICSEARCH_CARDINALITY_PRECISION, determines the point at which the
     count begins to trade accuracy for performance.
 
-    :param base_query: The Elasticsearch DSL Search object.
-    :param query: The ES Query object to perform the count query.
+    :param count_query: The Elasticsearch DSL Search object containing the
+    count query.
     :param unique_field: The field name on which the cardinality aggregation
     will be based to estimate uniqueness.
 
     :return: The ES cardinality aggregation query.
     """
 
-    search_query = base_query.query(query)
-    search_query.aggs.bucket(
+    count_query.aggs.bucket(
         "unique_documents",
         "cardinality",
         field=unique_field,
         precision_threshold=settings.ELASTICSEARCH_CARDINALITY_PRECISION,
     )
-    return search_query.extra(size=0, track_total_hits=True)
+    return count_query.extra(size=0, track_total_hits=True)
 
 
 def do_collapse_count_query(
@@ -2970,7 +2979,8 @@ def do_collapse_count_query(
     unique_field = (
         "cluster_id" if search_type == SEARCH_TYPES.OPINION else "docket_id"
     )
-    search_query = build_cardinality_count(main_query, query, unique_field)
+    count_query = main_query.query(query)
+    search_query = build_cardinality_count(count_query, unique_field)
     try:
         total_results = (
             search_query.execute().aggregations.unique_documents.value
@@ -3014,3 +3024,20 @@ def do_es_alert_estimation_query(
     estimation_query, _ = build_es_base_query(search_query, cd)
 
     return estimation_query.count()
+
+
+def simplify_estimated_count(search_count: int) -> int:
+    """Simplify the estimated search count to the nearest rounded figure.
+    It only applies this rounding if the search_count exceeds the
+    ELASTICSEARCH_CARDINALITY_PRECISION threshold.
+
+    :param search_count: The original search count.
+    :return: The simplified search_count, rounded to the nearest significant
+    figure or the original search_count if below the threshold.
+    """
+    if search_count > settings.ELASTICSEARCH_CARDINALITY_PRECISION:
+        search_count_str = str(search_count)
+        first_two = search_count_str[:2]
+        zeroes = (len(search_count_str) - 2) * "0"
+        return int(first_two + zeroes)
+    return search_count
diff --git a/cl/search/api_utils.py b/cl/search/api_utils.py
index dbd1d3e127..8f2b11b74b 100644
--- a/cl/search/api_utils.py
+++ b/cl/search/api_utils.py
@@ -23,7 +23,7 @@
 )
 from cl.lib.scorched_utils import ExtraSolrInterface
 from cl.lib.utils import map_to_docket_entry_sorting
-from cl.search.constants import SEARCH_HL_TAG
+from cl.search.constants import SEARCH_HL_TAG, cardinality_query_unique_ids
 from cl.search.documents import (
     AudioDocument,
     DocketDocument,
@@ -289,13 +289,13 @@ class CursorESList:
     well as the pagination logic for cursor-based pagination.
     """
 
-    cardinality_query = {
-        SEARCH_TYPES.RECAP: ("docket_id", DocketDocument),
-        SEARCH_TYPES.DOCKETS: ("docket_id", DocketDocument),
-        SEARCH_TYPES.RECAP_DOCUMENT: ("id", DocketDocument),
-        SEARCH_TYPES.OPINION: ("cluster_id", OpinionClusterDocument),
-        SEARCH_TYPES.PEOPLE: ("id", PersonDocument),
-        SEARCH_TYPES.ORAL_ARGUMENT: ("id", AudioDocument),
+    cardinality_base_document = {
+        SEARCH_TYPES.RECAP: DocketDocument,
+        SEARCH_TYPES.DOCKETS: DocketDocument,
+        SEARCH_TYPES.RECAP_DOCUMENT: DocketDocument,
+        SEARCH_TYPES.OPINION: OpinionClusterDocument,
+        SEARCH_TYPES.PEOPLE: PersonDocument,
+        SEARCH_TYPES.ORAL_ARGUMENT: AudioDocument,
     }
 
     def __init__(
@@ -350,23 +350,27 @@ def get_paginated_results(
 
         # Cardinality query parameters
         query = Q(self.main_query.to_dict(count=True)["query"])
-        unique_field, search_document = self.cardinality_query[
+        unique_field = cardinality_query_unique_ids[self.clean_data["type"]]
+        search_document = self.cardinality_base_document[
             self.clean_data["type"]
         ]
-        base_search = search_document.search()
+        main_count_query = search_document.search().query(query)
         cardinality_query = build_cardinality_count(
-            base_search, query, unique_field
+            main_count_query, unique_field
         )
 
         # Build a cardinality query to count child documents.
         child_cardinality_query = None
         child_cardinality_count_response = None
         if self.child_docs_query:
-            child_unique_field, _ = self.cardinality_query[
+            child_unique_field = cardinality_query_unique_ids[
                 SEARCH_TYPES.RECAP_DOCUMENT
             ]
+            child_count_query = search_document.search().query(
+                self.child_docs_query
+            )
             child_cardinality_query = build_cardinality_count(
-                base_search, self.child_docs_query, child_unique_field
+                child_count_query, child_unique_field
             )
         try:
             multi_search = MultiSearch()
@@ -476,8 +480,7 @@ def get_api_query_sorting(self):
         default_unique_order = {
             "type": self.clean_data["type"],
         }
-
-        unique_field, _ = self.cardinality_query[self.clean_data["type"]]
+        unique_field = cardinality_query_unique_ids[self.clean_data["type"]]
         # Use a document unique field as a unique sorting key for the current
         # search type.
         default_unique_order.update(
diff --git a/cl/search/constants.py b/cl/search/constants.py
index 78cf3dd4de..468df25a95 100644
--- a/cl/search/constants.py
+++ b/cl/search/constants.py
@@ -293,3 +293,13 @@
     Opinion.ON_MOTION_TO_STRIKE: "on-motion-to-strike",
     Opinion.TRIAL_COURT: "trial-court-document",
 }
+
+cardinality_query_unique_ids = {
+    SEARCH_TYPES.RECAP: "docket_id",
+    SEARCH_TYPES.DOCKETS: "docket_id",
+    SEARCH_TYPES.RECAP_DOCUMENT: "id",
+    SEARCH_TYPES.OPINION: "cluster_id",
+    SEARCH_TYPES.PEOPLE: "id",
+    SEARCH_TYPES.ORAL_ARGUMENT: "id",
+    SEARCH_TYPES.PARENTHETICAL: "id",
+}
diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index 05cc09a60c..07615e0ff5 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -237,8 +237,13 @@
                     {% endif %}
 
                     <h2 id="result-count" class="bottom">
+                        {% if  results_details.1 > estimated_count_threshold  %}About{% endif %}
                         {% if type == SEARCH_TYPES.OPINION %}
+                          {% flag "o-es-active" %}
+                            {{ results_details.1|intcomma }} Opinion{{ results_details.1|pluralize }}
+                          {% else %}
                             {{ results.paginator.count|intcomma }} Opinion{{ results.paginator.count|pluralize }}
+                          {% endflag %}
                           {% if cited_cluster %}
                             <span class="gray alt">cite{{ results.paginator.count|pluralize:"s," }}</span> {{ cited_cluster.caption|safe|v_wrapper }}
                           {% endif %}
@@ -252,7 +257,8 @@ <h2 id="result-count" class="bottom">
                             {% with matches=results_details.3 count=results_details.1 %}
                               {{ count|intcomma }} Case{{ count|pluralize }}
                               {% if matches %}
-                                <span class="gray">&mdash;</span>
+                                <span class="gray">and</span>
+                                {% if  matches > estimated_count_threshold  %} about{% endif %}
                                 {{ matches|intcomma }} Docket&nbsp;Entr{{ matches|pluralize:"y,ies" }}
                               {% endif %}
                             {% endwith %}
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index 2dcd3a78ab..48adce4db5 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -27,6 +27,7 @@
 from timeout_decorator import timeout_decorator
 
 from cl.audio.factories import AudioFactory
+from cl.lib.elasticsearch_utils import simplify_estimated_count
 from cl.lib.redis_utils import get_redis_interface
 from cl.lib.search_utils import make_fq
 from cl.lib.storage import clobbering_get_name
@@ -910,6 +911,27 @@ def test_handle_unbalanced_parentheses(self) -> None:
         )
         self.assertNotIn("Did you mean", r.content.decode())
 
+    def test_round_estimated_search_counts(self) -> None:
+        """Confirm search counts above the threshold are properly rounded"""
+
+        tests = [
+            (13, 13),  # Below ELASTICSEARCH_CARDINALITY_PRECISION threshold
+            (109, 109),
+            (809, 809),
+            (1074, 1074),
+            (
+                11740,
+                11000,
+            ),  # Above ELASTICSEARCH_CARDINALITY_PRECISION threshold
+            (367740, 360000),
+            (7867740, 7800000),
+            (95367740, 95000000),
+            (436307740, 430000000),
+        ]
+        for test in tests:
+            with self.subTest(test=test, msg="Test estimated search counts."):
+                self.assertEqual(simplify_estimated_count(test[0]), test[1])
+
 
 class SearchAPIV4CommonTest(ESIndexTestCase, TestCase):
     """Common tests for the Search API V4 endpoints."""
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 994f1499d9..37e5af4223 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1950,6 +1950,34 @@ def test_nested_opinions_limit_frontend(self) -> None:
         )
         cluster.delete()
 
+    def test_frontend_opinions_count(self) -> None:
+        """Assert Opinions search results counts in the fronted. Below and
+        above the estimation threshold.
+        """
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        counts_text = self._get_frontend_counts_text(r)
+        # 2 cases and 3 Docket entries in counts are returned
+        self.assertIn("4 Opinions", counts_text)
+
+        # Assert estimated counts above the threshold.
+        with mock.patch(
+            "cl.lib.elasticsearch_utils.simplify_estimated_count",
+            return_value=2300,
+        ):
+            r = self.client.get(
+                reverse("show_results"),
+                search_params,
+            )
+        counts_text = self._get_frontend_counts_text(r)
+        self.assertIn("About 2,300 Opinions", counts_text)
+
 
 class RelatedSearchTest(
     ESIndexTestCase, CourtTestCase, PeopleTestCase, SearchTestCase, TestCase
diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py
index f47453b7bc..5176b9a775 100644
--- a/cl/search/tests/tests_es_oral_arguments.py
+++ b/cl/search/tests/tests_es_oral_arguments.py
@@ -2359,6 +2359,34 @@ def test_percolator(self) -> None:
             AudioPercolator._index._name, created_queries_ids
         )
 
+    def test_frontend_oa_count(self) -> None:
+        """Assert OA search results counts in the fronted. Below and
+        above the estimation threshold.
+        """
+        search_params = {
+            "type": SEARCH_TYPES.ORAL_ARGUMENT,
+            "q": "",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        counts_text = self._get_frontend_counts_text(r)
+        # 2 cases and 3 Docket entries in counts are returned
+        self.assertIn("5 Oral Arguments", counts_text)
+
+        # Assert estimated counts above the threshold.
+        with mock.patch(
+            "cl.lib.elasticsearch_utils.simplify_estimated_count",
+            return_value=2300,
+        ):
+            r = self.client.get(
+                reverse("show_results"),
+                search_params,
+            )
+        counts_text = self._get_frontend_counts_text(r)
+        self.assertIn("About 2,300 Oral Arguments", counts_text)
+
     def test_search_transcript(self) -> None:
         """Test search transcript."""
 
diff --git a/cl/search/tests/tests_es_person.py b/cl/search/tests/tests_es_person.py
index 0eb72bfe96..5012c058d1 100644
--- a/cl/search/tests/tests_es_person.py
+++ b/cl/search/tests/tests_es_person.py
@@ -1957,6 +1957,34 @@ def test_results_highlights(self) -> None:
         r = self._test_article_count(params, 1, "q")
         self.assertIn("<mark>Independent</mark>", r.content.decode())
 
+    def test_frontend_judges_count(self) -> None:
+        """Assert Judges search results counts in the fronted. Below and
+        above the estimation threshold.
+        """
+        search_params = {
+            "type": SEARCH_TYPES.PEOPLE,
+            "q": "",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        counts_text = self._get_frontend_counts_text(r)
+        # 2 cases and 3 Docket entries in counts are returned
+        self.assertIn("2 Judges", counts_text)
+
+        # Assert estimated counts above the threshold.
+        with mock.patch(
+            "cl.lib.elasticsearch_utils.simplify_estimated_count",
+            return_value=2300,
+        ):
+            r = self.client.get(
+                reverse("show_results"),
+                search_params,
+            )
+        counts_text = self._get_frontend_counts_text(r)
+        self.assertIn("About 2,300 Judges", counts_text)
+
 
 class IndexJudgesPositionsCommandTest(
     CourtTestCase, PeopleTestCase, ESIndexTestCase, TestCase
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 389193b204..1849150d83 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -340,9 +340,9 @@ async def test_recap_dockets_search_type(self) -> None:
         # The View Additional Results button is shown.
         self.assertIn("View Additional Results for", r.content.decode())
 
-    def test_match_all_query_and_docket_entries_count(self) -> None:
-        """Confirm a RECAP search with no params return a match all query.
-        The case and docket entries count is available.
+    def test_frontend_docket_and_docket_entries_count(self) -> None:
+        """Assert RECAP search results counts in the fronted. Below and
+        above the estimation threshold.
         """
 
         # Perform a RECAP match all search.
@@ -351,10 +351,9 @@ def test_match_all_query_and_docket_entries_count(self) -> None:
         r = async_to_sync(self._test_article_count)(
             params, 2, "match all query"
         )
-        # Two cases are returned.
-        self.assertIn("2 Cases", r.content.decode())
-        # 3 Docket entries in count.
-        self.assertIn("3 Docket", r.content.decode())
+        counts_text = self._get_frontend_counts_text(r)
+        # 2 cases and 3 Docket entries in counts are returned
+        self.assertIn("2 Cases and 3 Docket Entries", counts_text)
 
         with self.captureOnCommitCallbacks(execute=True):
             # Confirm an empty docket is shown in a match_all query.
@@ -370,10 +369,20 @@ def test_match_all_query_and_docket_entries_count(self) -> None:
         r = async_to_sync(self._test_article_count)(
             params, 3, "match all query"
         )
-        # 3 cases are returned.
-        self.assertIn("3 Cases", r.content.decode())
-        # 3 Docket entries in count.
-        self.assertIn("3 Docket", r.content.decode())
+        counts_text = self._get_frontend_counts_text(r)
+        # 3 cases and 3 Docket entries in counts are returned
+        self.assertIn("3 Cases and 3 Docket Entries", counts_text)
+
+        # Assert estimated counts above the threshold.
+        with mock.patch(
+            "cl.lib.elasticsearch_utils.simplify_estimated_count",
+            return_value=2300,
+        ):
+            r = async_to_sync(self.async_client.get)("/", params)
+        counts_text = self._get_frontend_counts_text(r)
+        self.assertIn(
+            "About 2,300 Cases and about 2,300 Docket Entries", counts_text
+        )
         with self.captureOnCommitCallbacks(execute=True):
             empty_docket.delete()
 
diff --git a/cl/search/views.py b/cl/search/views.py
index b0fd7c2d89..8f0172cb3a 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -683,7 +683,6 @@ def do_es_search(
     other location.
     """
     paged_results = None
-    # One court?
     courts = Court.objects.filter(in_use=True)
     query_time = total_query_results = 0
     top_hits_limit = 5
@@ -816,6 +815,7 @@ def do_es_search(
         "cited_cluster": cited_cluster,
         "query_citation": query_citation,
         "facet_fields": facet_fields,
+        "estimated_count_threshold": settings.ELASTICSEARCH_CARDINALITY_PRECISION,
     }
 
 
diff --git a/cl/tests/cases.py b/cl/tests/cases.py
index b7bfd2777a..83aa93e4b5 100644
--- a/cl/tests/cases.py
+++ b/cl/tests/cases.py
@@ -8,7 +8,7 @@
 from django.core.management import call_command
 from django.urls import reverse
 from django_elasticsearch_dsl.registries import registry
-from lxml import etree
+from lxml import etree, html
 from rest_framework.test import APITestCase
 
 from cl.lib.redis_utils import get_redis_interface
@@ -209,6 +209,16 @@ def assert_es_feed_content(self, node_tests, response, namespaces):
 
         return xml_tree
 
+    @staticmethod
+    def _get_frontend_counts_text(r):
+        """Extract and clean frontend counts text from the response content."""
+        tree = html.fromstring(r.content.decode())
+        counts_h2_element = tree.xpath('//h2[@id="result-count"]')[0]
+        counts_text = " ".join(counts_h2_element.xpath(".//text()"))
+        counts_text = counts_text.replace("&nbsp;", " ")
+        counts_text = counts_text.split()
+        return " ".join(counts_text)
+
 
 class CountESTasksTestCase(SimpleTestCase):
     def setUp(self):

From a43efd46c5bd92988e2f6adb64a447008f656843 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 31 Jul 2024 17:05:01 -0500
Subject: [PATCH 111/372] fix(elasticsearch): Removed "about" for docket
 entries results count

---
 cl/search/templates/search.html   | 3 +--
 cl/search/tests/tests_es_recap.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index 07615e0ff5..3271afa18f 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -237,7 +237,7 @@
                     {% endif %}
 
                     <h2 id="result-count" class="bottom">
-                        {% if  results_details.1 > estimated_count_threshold  %}About{% endif %}
+                        {% if  results_details.1 > estimated_count_threshold or results_details.3 > estimated_count_threshold %}About{% endif %}
                         {% if type == SEARCH_TYPES.OPINION %}
                           {% flag "o-es-active" %}
                             {{ results_details.1|intcomma }} Opinion{{ results_details.1|pluralize }}
@@ -258,7 +258,6 @@ <h2 id="result-count" class="bottom">
                               {{ count|intcomma }} Case{{ count|pluralize }}
                               {% if matches %}
                                 <span class="gray">and</span>
-                                {% if  matches > estimated_count_threshold  %} about{% endif %}
                                 {{ matches|intcomma }} Docket&nbsp;Entr{{ matches|pluralize:"y,ies" }}
                               {% endif %}
                             {% endwith %}
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 1849150d83..3a7d40e74c 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -381,7 +381,7 @@ def test_frontend_docket_and_docket_entries_count(self) -> None:
             r = async_to_sync(self.async_client.get)("/", params)
         counts_text = self._get_frontend_counts_text(r)
         self.assertIn(
-            "About 2,300 Cases and about 2,300 Docket Entries", counts_text
+            "About 2,300 Cases and 2,300 Docket Entries", counts_text
         )
         with self.captureOnCommitCallbacks(execute=True):
             empty_docket.delete()

From e8a9c68169fd6d376154590e90c573069438385d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 31 Jul 2024 19:14:20 -0600
Subject: [PATCH 112/372] feat(opinion_order): update code for harvard source

---
 .../commands/update_opinions_order.py         | 66 ++++++++++++-------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 5c91d0e4b1..dc00b24818 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -5,7 +5,7 @@
 from bs4 import BeautifulSoup
 from django.core.management import BaseCommand
 from django.db import transaction
-from django.db.models import Count
+from django.db.models import Count, Q
 
 from cl.corpus_importer.import_columbia.columbia_utils import (
     extract_columbia_opinions,
@@ -142,28 +142,38 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None:
     :return: None
     """
 
+    # The filepath_json_harvard field can only be filled by the harvard importer,
+    # this helps us confirm that it was imported from a Harvard json
+    base_filter = Q(
+        opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES
+    ) & ~Q(filepath_json_harvard="")
+
+    if start_id:
+        base_filter &= Q(pk__gte=start_id)
+
+    if end_id:
+        base_filter &= Q(pk__lte=end_id)
+
     # Get all harvard clusters with more than one opinion
     clusters = (
         OpinionCluster.objects.prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES)
+        .filter(base_filter)
         .order_by("id")
     )
 
-    if start_id:
-        clusters = clusters.filter(pk__gte=start_id)
-
-    if end_id:
-        clusters = clusters.filter(pk__lte=end_id)
-
-    # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
         logger.info(f"Processing cluster id: {oc}")
 
-        for opinion_order, cluster_op in enumerate(
-            oc.sub_opinions.exclude(type="010combined").order_by("id"),
-            start=1,
-        ):
+        cluster_opinions = oc.sub_opinions.exclude(
+            type="010combined"
+        ).order_by("id")
+
+        if not cluster_opinions:
+            logger.info(f"No opinions left to order for cluster id: {oc}")
+            continue
+
+        for opinion_order, cluster_op in enumerate(cluster_opinions, start=1):
             cluster_op.ordering_key = opinion_order
             cluster_op.save()
 
@@ -344,27 +354,23 @@ def add_arguments(self, parser):
             action="store_true",
             help="Fix harvard opinions order",
         )
-
         parser.add_argument(
             "--process-columbia",
             action="store_true",
             help="Fix columbia opinions order",
         )
-
         parser.add_argument(
             "--xml-dir",
             default="/opt/courtlistener/_columbia",
             required=False,
             help="The absolute path to the directory with columbia xml files",
         )
-
         parser.add_argument(
             "--start-id",
             type=int,
             default=0,
             help="Start id for a range of clusters (inclusive)",
         )
-
         parser.add_argument(
             "--end-id",
             type=int,
@@ -372,18 +378,32 @@ def add_arguments(self, parser):
             help="End id for a range of clusters (inclusive)",
         )
 
-    def handle(self, *args, **options):
+    def validate_args(self, opts):
+        """Validate arguments passed to the command
 
-        if not options["process_harvard"] and not options["process_columbia"]:
-            logger.info(
+        :param opts: dictionary with arguments from the command
+        :return: true if validations are satisfied else false
+        """
+        if opts["end_id"] > opts["start_id"]:
+            logger.error("end-id should be greater or equal than start-id")
+            return False
+
+        if not opts["process_harvard"] and not opts["process_columbia"]:
+            logger.error(
                 "One option required: process-harvard or process-columbia"
             )
-            return
+            return False
 
-        if options["process_harvard"] and options["process_columbia"]:
-            logger.info(
+        if opts["process_harvard"] and opts["process_columbia"]:
+            logger.error(
                 "You can only select one option process-harvard or process-columbia"
             )
+            return False
+        return True
+
+    def handle(self, *args, **options):
+
+        if not self.validate_args(options):
             return
 
         if options["process_harvard"]:

From 295a88a3853d5df98ee28c0065a3f02d8dc6b04f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Aug 2024 07:24:33 +0000
Subject: [PATCH 113/372] build(deps): bump pandas from 2.1.4 to 2.2.2

Bumps [pandas](https://github.com/pandas-dev/pandas) from 2.1.4 to 2.2.2.
- [Release notes](https://github.com/pandas-dev/pandas/releases)
- [Commits](https://github.com/pandas-dev/pandas/compare/v2.1.4...v2.2.2)

---
updated-dependencies:
- dependency-name: pandas
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 103 ++++++++++++++++++++++++++-----------------------
 pyproject.toml |   2 +-
 2 files changed, 55 insertions(+), 50 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index c955986b5b..bde40e823b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2911,67 +2911,72 @@ files = [
 
 [[package]]
 name = "pandas"
-version = "2.1.4"
+version = "2.2.2"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "pandas-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdec823dc6ec53f7a6339a0e34c68b144a7a1fd28d80c260534c39c62c5bf8c9"},
-    {file = "pandas-2.1.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:294d96cfaf28d688f30c918a765ea2ae2e0e71d3536754f4b6de0ea4a496d034"},
-    {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b728fb8deba8905b319f96447a27033969f3ea1fea09d07d296c9030ab2ed1d"},
-    {file = "pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00028e6737c594feac3c2df15636d73ace46b8314d236100b57ed7e4b9ebe8d9"},
-    {file = "pandas-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:426dc0f1b187523c4db06f96fb5c8d1a845e259c99bda74f7de97bd8a3bb3139"},
-    {file = "pandas-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:f237e6ca6421265643608813ce9793610ad09b40154a3344a088159590469e46"},
-    {file = "pandas-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b7d852d16c270e4331f6f59b3e9aa23f935f5c4b0ed2d0bc77637a8890a5d092"},
-    {file = "pandas-2.1.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7d5f2f54f78164b3d7a40f33bf79a74cdee72c31affec86bfcabe7e0789821"},
-    {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0aa6e92e639da0d6e2017d9ccff563222f4eb31e4b2c3cf32a2a392fc3103c0d"},
-    {file = "pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d797591b6846b9db79e65dc2d0d48e61f7db8d10b2a9480b4e3faaddc421a171"},
-    {file = "pandas-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2d3e7b00f703aea3945995ee63375c61b2e6aa5aa7871c5d622870e5e137623"},
-    {file = "pandas-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:dc9bf7ade01143cddc0074aa6995edd05323974e6e40d9dbde081021ded8510e"},
-    {file = "pandas-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:482d5076e1791777e1571f2e2d789e940dedd927325cc3cb6d0800c6304082f6"},
-    {file = "pandas-2.1.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a706cfe7955c4ca59af8c7a0517370eafbd98593155b48f10f9811da440248b"},
-    {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0513a132a15977b4a5b89aabd304647919bc2169eac4c8536afb29c07c23540"},
-    {file = "pandas-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9f17f2b6fc076b2a0078862547595d66244db0f41bf79fc5f64a5c4d635bead"},
-    {file = "pandas-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:45d63d2a9b1b37fa6c84a68ba2422dc9ed018bdaa668c7f47566a01188ceeec1"},
-    {file = "pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf"},
-    {file = "pandas-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3f06bda01a143020bad20f7a85dd5f4a1600112145f126bc9e3e42077c24ef34"},
-    {file = "pandas-2.1.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab5796839eb1fd62a39eec2916d3e979ec3130509930fea17fe6f81e18108f6a"},
-    {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbaf9e8d3a63a9276d707b4d25930a262341bca9874fcb22eff5e3da5394732"},
-    {file = "pandas-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ebfd771110b50055712b3b711b51bee5d50135429364d0498e1213a7adc2be8"},
-    {file = "pandas-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8ea107e0be2aba1da619cc6ba3f999b2bfc9669a83554b1904ce3dd9507f0860"},
-    {file = "pandas-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:d65148b14788b3758daf57bf42725caa536575da2b64df9964c563b015230984"},
-    {file = "pandas-2.1.4.tar.gz", hash = "sha256:fcb68203c833cc735321512e13861358079a96c174a61f5116a1de89c58c0ef7"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"},
+    {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"},
+    {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
+    {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"},
+    {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"},
+    {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"},
 ]
 
 [package.dependencies]
-numpy = {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}
+numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""}
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
-tzdata = ">=2022.1"
+tzdata = ">=2022.7"
 
 [package.extras]
-all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
-aws = ["s3fs (>=2022.05.0)"]
-clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
-compression = ["zstandard (>=0.17.0)"]
-computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
 consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
-excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
-feather = ["pyarrow (>=7.0.0)"]
-fss = ["fsspec (>=2022.05.0)"]
-gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
-hdf5 = ["tables (>=3.7.0)"]
-html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
-mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
-output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
-parquet = ["pyarrow (>=7.0.0)"]
-performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
-plot = ["matplotlib (>=3.6.1)"]
-postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
-spss = ["pyreadstat (>=1.1.5)"]
-sql-other = ["SQLAlchemy (>=1.4.36)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
 test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
-xml = ["lxml (>=4.8.0)"]
+xml = ["lxml (>=4.9.2)"]
 
 [[package]]
 name = "parso"
@@ -5461,4 +5466,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "e6d34875888f1687912d03d33ea68038bba6c6d487037c6454d5b18449ec6d0c"
+content-hash = "93780fee54ecdb06fc37a54588220180021dca8c08d87c40b1784f9360439e21"
diff --git a/pyproject.toml b/pyproject.toml
index 09162c50eb..244a677a58 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,7 +62,7 @@ natsort = "^8.4.0"
 ndg-httpsclient = "^0.5.1"
 networkx = "^3.2.1"
 nose = "*"
-pandas = "^2.1.4"
+pandas = "^2.2.2"
 pillow = "*"
 pycparser = "^2.22"
 pyopenssl = "*"

From 47df40cddd480c5941f2ae668536db09cf49b5eb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Aug 2024 10:03:45 +0000
Subject: [PATCH 114/372] build(deps): bump pillow from 10.3.0 to 10.4.0

Bumps [pillow](https://github.com/python-pillow/Pillow) from 10.3.0 to 10.4.0.
- [Release notes](https://github.com/python-pillow/Pillow/releases)
- [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst)
- [Commits](https://github.com/python-pillow/Pillow/compare/10.3.0...10.4.0)

---
updated-dependencies:
- dependency-name: pillow
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock | 153 ++++++++++++++++++++++++++++------------------------
 1 file changed, 82 insertions(+), 71 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index bde40e823b..72d3915164 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3020,84 +3020,95 @@ ptyprocess = ">=0.5"
 
 [[package]]
 name = "pillow"
-version = "10.3.0"
+version = "10.4.0"
 description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"},
-    {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"},
-    {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf"},
-    {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599"},
-    {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475"},
-    {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf"},
-    {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3"},
-    {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5"},
-    {file = "pillow-10.3.0-cp310-cp310-win32.whl", hash = "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2"},
-    {file = "pillow-10.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f"},
-    {file = "pillow-10.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b"},
-    {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"},
-    {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"},
-    {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27"},
-    {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994"},
-    {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451"},
-    {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd"},
-    {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad"},
-    {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c"},
-    {file = "pillow-10.3.0-cp311-cp311-win32.whl", hash = "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09"},
-    {file = "pillow-10.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d"},
-    {file = "pillow-10.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f"},
-    {file = "pillow-10.3.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84"},
-    {file = "pillow-10.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19"},
-    {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338"},
-    {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1"},
-    {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462"},
-    {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a"},
-    {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef"},
-    {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3"},
-    {file = "pillow-10.3.0-cp312-cp312-win32.whl", hash = "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d"},
-    {file = "pillow-10.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b"},
-    {file = "pillow-10.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a"},
-    {file = "pillow-10.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b"},
-    {file = "pillow-10.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2"},
-    {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa"},
-    {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383"},
-    {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d"},
-    {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd"},
-    {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d"},
-    {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3"},
-    {file = "pillow-10.3.0-cp38-cp38-win32.whl", hash = "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b"},
-    {file = "pillow-10.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999"},
-    {file = "pillow-10.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936"},
-    {file = "pillow-10.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002"},
-    {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60"},
-    {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375"},
-    {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57"},
-    {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8"},
-    {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9"},
-    {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb"},
-    {file = "pillow-10.3.0-cp39-cp39-win32.whl", hash = "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572"},
-    {file = "pillow-10.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb"},
-    {file = "pillow-10.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f"},
-    {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355"},
-    {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9"},
-    {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2"},
-    {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463"},
-    {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced"},
-    {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3"},
-    {file = "pillow-10.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170"},
-    {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32"},
-    {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828"},
-    {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f"},
-    {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015"},
-    {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5"},
-    {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a"},
-    {file = "pillow-10.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591"},
-    {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
+    {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"},
+    {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"},
+    {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856"},
+    {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f"},
+    {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b"},
+    {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc"},
+    {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e"},
+    {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46"},
+    {file = "pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984"},
+    {file = "pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141"},
+    {file = "pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1"},
+    {file = "pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c"},
+    {file = "pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be"},
+    {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3"},
+    {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6"},
+    {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe"},
+    {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319"},
+    {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d"},
+    {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696"},
+    {file = "pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496"},
+    {file = "pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91"},
+    {file = "pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22"},
+    {file = "pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94"},
+    {file = "pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597"},
+    {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80"},
+    {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca"},
+    {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef"},
+    {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a"},
+    {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b"},
+    {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9"},
+    {file = "pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42"},
+    {file = "pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a"},
+    {file = "pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9"},
+    {file = "pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3"},
+    {file = "pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb"},
+    {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70"},
+    {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be"},
+    {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0"},
+    {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc"},
+    {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a"},
+    {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309"},
+    {file = "pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060"},
+    {file = "pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea"},
+    {file = "pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d"},
+    {file = "pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736"},
+    {file = "pillow-10.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b"},
+    {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2"},
+    {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680"},
+    {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b"},
+    {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd"},
+    {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84"},
+    {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0"},
+    {file = "pillow-10.4.0-cp38-cp38-win32.whl", hash = "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e"},
+    {file = "pillow-10.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab"},
+    {file = "pillow-10.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d"},
+    {file = "pillow-10.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b"},
+    {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd"},
+    {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126"},
+    {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b"},
+    {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c"},
+    {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1"},
+    {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df"},
+    {file = "pillow-10.4.0-cp39-cp39-win32.whl", hash = "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef"},
+    {file = "pillow-10.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5"},
+    {file = "pillow-10.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885"},
+    {file = "pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27"},
+    {file = "pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3"},
+    {file = "pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06"},
 ]
 
 [package.extras]
-docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"]
+docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"]
 fpx = ["olefile"]
 mic = ["olefile"]
 tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]

From 882ea68c58b4f7706f8fbbd465d5eccff2bd71db Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Thu, 1 Aug 2024 09:57:52 -0400
Subject: [PATCH 115/372] fix(api): Removes duplicate attorneys from response
 by using distinct

---
 cl/api/tests.py         | 17 +++++++++++++++++
 cl/people_db/filters.py |  6 +++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index f03422c339..f550de3b4a 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -59,6 +59,8 @@
     SchoolViewSet,
     SourceViewSet,
 )
+from cl.people_db.factories import PartyFactory, PartyTypeFactory
+from cl.people_db.models import Attorney
 from cl.recap.factories import ProcessingQueueFactory
 from cl.recap.views import (
     EmailProcessingQueueViewSet,
@@ -759,6 +761,21 @@ async def test_attorney_filters(self) -> None:
         self.q = {"parties_represented__name__contains": "Honker-Nope"}
         await self.assertCountInResults(0)
 
+        # Adds extra role to the existing attorney
+        docket = await Docket.objects.afirst()
+        attorney = await Attorney.objects.afirst()
+        await sync_to_async(PartyTypeFactory.create)(
+            party=await sync_to_async(PartyFactory)(
+                docket=docket,
+                attorneys=[attorney],
+            ),
+            docket=docket,
+        )
+        self.q = {"docket__date_created__range": "2017-04-14,2017-04-15"}
+        await self.assertCountInResults(1)
+        self.q = {"docket__date_created__range": "2017-04-15,2017-04-16"}
+        await self.assertCountInResults(0)
+
     async def test_party_filters(self) -> None:
         self.path = reverse("party-list", kwargs={"version": "v3"})
 
diff --git a/cl/people_db/filters.py b/cl/people_db/filters.py
index cf1cb314f1..8f6b30388e 100644
--- a/cl/people_db/filters.py
+++ b/cl/people_db/filters.py
@@ -276,9 +276,13 @@ class AttorneyFilter(NoEmptyFilterSet):
         "cl.search.filters.DocketFilter",
         field_name="roles__docket",
         queryset=Docket.objects.all(),
+        distinct=True,
     )
     parties_represented = filters.RelatedFilter(
-        PartyFilter, field_name="roles__party", queryset=Party.objects.all()
+        PartyFilter,
+        field_name="roles__party",
+        queryset=Party.objects.all(),
+        distinct=True,
     )
 
     class Meta:

From 959412d7a7c2b6fc93ea9409ac1735f9a0b87458 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 1 Aug 2024 10:21:21 -0500
Subject: [PATCH 116/372] =?UTF-8?q?fix(elasticsearch):=20Considered=20card?=
 =?UTF-8?q?inality=20error=20can=20be=20up=20to=20=C2=B16%=20when=20estima?=
 =?UTF-8?q?ting=20counts.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cl/lib/elasticsearch_utils.py              | 16 ++++++++++++++--
 cl/search/templates/search.html            |  2 +-
 cl/search/tests/tests.py                   | 19 +++++++++++--------
 cl/search/tests/tests_es_opinion.py        |  4 ++--
 cl/search/tests/tests_es_oral_arguments.py |  4 ++--
 cl/search/tests/tests_es_recap.py          | 10 ++++++++--
 cl/search/views.py                         |  8 +++++++-
 7 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 127d25ae65..c9a729c9fa 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -2944,7 +2944,9 @@ def build_cardinality_count(count_query: Search, unique_field: str) -> Search:
     This aggregation estimates the count of unique documents based on the
     specified unique field. The precision_threshold, set by
     ELASTICSEARCH_CARDINALITY_PRECISION, determines the point at which the
-    count begins to trade accuracy for performance.
+    count begins to trade accuracy for performance. The error in the
+    approximation count using this method ranges from 1% to 6%.
+    https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html#_counts_are_approximate
 
     :param count_query: The Elasticsearch DSL Search object containing the
     count query.
@@ -3026,6 +3028,13 @@ def do_es_alert_estimation_query(
     return estimation_query.count()
 
 
+def compute_lowest_possible_estimate(precision_threshold: int) -> int:
+    """Estimates can be below reality by as much as 6%. Round numbers below that threshold.
+    :return: The lowest possible estimate.
+    """
+    return int(precision_threshold * 0.94)
+
+
 def simplify_estimated_count(search_count: int) -> int:
     """Simplify the estimated search count to the nearest rounded figure.
     It only applies this rounding if the search_count exceeds the
@@ -3035,7 +3044,10 @@ def simplify_estimated_count(search_count: int) -> int:
     :return: The simplified search_count, rounded to the nearest significant
     figure or the original search_count if below the threshold.
     """
-    if search_count > settings.ELASTICSEARCH_CARDINALITY_PRECISION:
+
+    if search_count >= compute_lowest_possible_estimate(
+        settings.ELASTICSEARCH_CARDINALITY_PRECISION
+    ):
         search_count_str = str(search_count)
         first_two = search_count_str[:2]
         zeroes = (len(search_count_str) - 2) * "0"
diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index 3271afa18f..260ce1e7d3 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -237,7 +237,7 @@
                     {% endif %}
 
                     <h2 id="result-count" class="bottom">
-                        {% if  results_details.1 > estimated_count_threshold or results_details.3 > estimated_count_threshold %}About{% endif %}
+                        {% if  results_details.1 >= estimated_count_threshold or results_details.3 >= estimated_count_threshold %}About{% endif %}
                         {% if type == SEARCH_TYPES.OPINION %}
                           {% flag "o-es-active" %}
                             {{ results_details.1|intcomma }} Opinion{{ results_details.1|pluralize }}
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index 48adce4db5..aa20691058 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -918,15 +918,18 @@ def test_round_estimated_search_counts(self) -> None:
             (13, 13),  # Below ELASTICSEARCH_CARDINALITY_PRECISION threshold
             (109, 109),
             (809, 809),
-            (1074, 1074),
+            (1_074, 1_074),
+            (1_768, 1_768),
+            (1_881, 1_800),  # Above ELASTICSEARCH_CARDINALITY_PRECISION * 0.94
+            # threshold
             (
-                11740,
-                11000,
-            ),  # Above ELASTICSEARCH_CARDINALITY_PRECISION threshold
-            (367740, 360000),
-            (7867740, 7800000),
-            (95367740, 95000000),
-            (436307740, 430000000),
+                11_740,
+                11_000,
+            ),
+            (367_740, 360_000),
+            (7_867_740, 7_800_000),
+            (95_367_740, 95_000_000),
+            (436_307_740, 430_000_000),
         ]
         for test in tests:
             with self.subTest(test=test, msg="Test estimated search counts."):
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 37e5af4223..928dfdb03d 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1969,14 +1969,14 @@ def test_frontend_opinions_count(self) -> None:
         # Assert estimated counts above the threshold.
         with mock.patch(
             "cl.lib.elasticsearch_utils.simplify_estimated_count",
-            return_value=2300,
+            return_value=5300,
         ):
             r = self.client.get(
                 reverse("show_results"),
                 search_params,
             )
         counts_text = self._get_frontend_counts_text(r)
-        self.assertIn("About 2,300 Opinions", counts_text)
+        self.assertIn("About 5,300 Opinions", counts_text)
 
 
 class RelatedSearchTest(
diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py
index 5176b9a775..b435ad08eb 100644
--- a/cl/search/tests/tests_es_oral_arguments.py
+++ b/cl/search/tests/tests_es_oral_arguments.py
@@ -2378,14 +2378,14 @@ def test_frontend_oa_count(self) -> None:
         # Assert estimated counts above the threshold.
         with mock.patch(
             "cl.lib.elasticsearch_utils.simplify_estimated_count",
-            return_value=2300,
+            return_value=1900,
         ):
             r = self.client.get(
                 reverse("show_results"),
                 search_params,
             )
         counts_text = self._get_frontend_counts_text(r)
-        self.assertIn("About 2,300 Oral Arguments", counts_text)
+        self.assertIn("About 1,900 Oral Arguments", counts_text)
 
     def test_search_transcript(self) -> None:
         """Test search transcript."""
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 3a7d40e74c..7c8c8fcc41 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -19,9 +19,11 @@
 
 from cl.lib.elasticsearch_utils import (
     build_es_main_query,
+    compute_lowest_possible_estimate,
     fetch_es_results,
     merge_unavailable_fields_on_parent_document,
     set_results_highlights,
+    simplify_estimated_count,
 )
 from cl.lib.redis_utils import get_redis_interface
 from cl.lib.test_helpers import (
@@ -376,12 +378,16 @@ def test_frontend_docket_and_docket_entries_count(self) -> None:
         # Assert estimated counts above the threshold.
         with mock.patch(
             "cl.lib.elasticsearch_utils.simplify_estimated_count",
-            return_value=2300,
+            return_value=simplify_estimated_count(
+                compute_lowest_possible_estimate(
+                    settings.ELASTICSEARCH_CARDINALITY_PRECISION
+                )
+            ),
         ):
             r = async_to_sync(self.async_client.get)("/", params)
         counts_text = self._get_frontend_counts_text(r)
         self.assertIn(
-            "About 2,300 Cases and 2,300 Docket Entries", counts_text
+            "About 1,800 Cases and 1,800 Docket Entries", counts_text
         )
         with self.captureOnCommitCallbacks(execute=True):
             empty_docket.delete()
diff --git a/cl/search/views.py b/cl/search/views.py
index 8f0172cb3a..aeef38c3f6 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -32,6 +32,7 @@
 from cl.lib.bot_detector import is_bot
 from cl.lib.elasticsearch_utils import (
     build_es_main_query,
+    compute_lowest_possible_estimate,
     convert_str_date_fields_to_date_objects,
     fetch_es_results,
     get_facet_dict_for_search_query,
@@ -40,6 +41,7 @@
     merge_courts_from_db,
     merge_unavailable_fields_on_parent_document,
     set_results_highlights,
+    simplify_estimated_count,
 )
 from cl.lib.paginators import ESPaginator
 from cl.lib.redis_utils import get_redis_interface
@@ -815,7 +817,11 @@ def do_es_search(
         "cited_cluster": cited_cluster,
         "query_citation": query_citation,
         "facet_fields": facet_fields,
-        "estimated_count_threshold": settings.ELASTICSEARCH_CARDINALITY_PRECISION,
+        "estimated_count_threshold": simplify_estimated_count(
+            compute_lowest_possible_estimate(
+                settings.ELASTICSEARCH_CARDINALITY_PRECISION
+            )
+        ),
     }
 
 

From f89a0d807da47b5882f4344a926d4f315b955776 Mon Sep 17 00:00:00 2001
From: ERosendo <ERosendo@users.noreply.github.com>
Date: Thu, 1 Aug 2024 15:39:06 +0000
Subject: [PATCH 117/372] Update freelawproject dependencies

---
 poetry.lock | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 72d3915164..a7e0fa7c3a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2234,13 +2234,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.12"
+version = "2.6.13"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.12-py27-none-any.whl", hash = "sha256:7a9b28d149e476eec654b0ea8ad4f04375a7597cc8d32bc169dff31bb8a5e63c"},
-    {file = "juriscraper-2.6.12.tar.gz", hash = "sha256:b70dd3c441e91c80414fc0fce32f78c1c0e6d480996784f94ea94aa4ce033644"},
+    {file = "juriscraper-2.6.13-py27-none-any.whl", hash = "sha256:437ad291fadf63201ceb8884ebd8eff2efebb893fb12c5387843807c3c69a06d"},
+    {file = "juriscraper-2.6.13.tar.gz", hash = "sha256:1468705ef02265876e9149a8fb7ac16fff512594bb06802cf587a236fa258755"},
 ]
 
 [package.dependencies]
@@ -2260,6 +2260,10 @@ requests = ">=2.20.0"
 selenium = ">=4.9.1"
 tldextract = "*"
 
+[package.extras]
+dev = ["pytest"]
+test = ["jsondate3-aware", "pytest"]
+
 [[package]]
 name = "kdtree"
 version = "0.16"

From ea14825267645a5e4b1debadb0e52a2280529a66 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 1 Aug 2024 12:30:21 -0400
Subject: [PATCH 118/372] feat(search.models): Update model from pr feedback

add second judge initials
change field names and help text
---
 cl/search/models.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 6cc37f4b18..3e68dc476b 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -556,35 +556,43 @@ class Docket(AbstractDateTimeModel, DocketSources):
         blank=True,
         db_index=True,
     )
-    office_code = models.CharField(
+    federal_dn_office_code = models.CharField(
         help_text="A one digit statistical code (either alphabetic or numeric) "
-        "of the office within the district. In this "
+        "of the office within the federal district. In this "
         "example, 2:07-cv-34911-MJL, the 2 preceding "
         "the : is the office code.",
-        max_length=1,
+        max_length=3,
         blank=True,
     )
-    case_type = models.CharField(
+    federal_dn_case_type = models.CharField(
         help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), "
         "petty offense (po), and miscellaneous (mc). These codes "
         "can be upper case or lower case, and may vary in number of "
         "characters.",
-        max_length=5,
+        max_length=6,
         blank=True,
     )
-    judge_initials = models.CharField(
+    federal_dn_judge_initials_assigned = models.CharField(
         help_text="A typically three-letter upper cased abbreviation "
         "of the judge's initials. In the example 2:07-cv-34911-MJL, "
         "MJL is the judge's initials. Judge initials change if a "
         "new judge takes over a case.",
-        max_length=4,
+        max_length=5,
+        blank=True,
+    )
+    federal_dn_judge_initials_referred = models.CharField(
+        help_text="A typically three-letter upper cased abbreviation "
+        "of the judge's initials. In the example 2:07-cv-34911-MJL-GOG, "
+        "GOG is the magistrate judge initials.",
+        max_length=5,
         blank=True,
     )
-    defendant_number = models.SmallIntegerField(
+    federal_defendant_number = models.SmallIntegerField(
         help_text="A unique number assigned to each defendant in a case, "
         "typically found in pacer criminal cases as a -1, -2 after "
         "the judge initials. Example: 1:14-cr-10363-RGS-1.",
         null=True,
+        blank=True,
     )
     # Nullable for unique constraint requirements.
     pacer_case_id = fields.CharNullField(

From bbe19c10bd397445e75d62197e737da8a702315e Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 1 Aug 2024 12:32:19 -0400
Subject: [PATCH 119/372] feat(search.models): Update migration files

---
 .../0032_update_docket_numbering_fields.py    | 80 ++++++++++++-------
 .../0032_update_docket_numbering_fields.sql   | 64 ++++++++-------
 2 files changed, 87 insertions(+), 57 deletions(-)

diff --git a/cl/search/migrations/0032_update_docket_numbering_fields.py b/cl/search/migrations/0032_update_docket_numbering_fields.py
index 1ef46be59a..875639c687 100644
--- a/cl/search/migrations/0032_update_docket_numbering_fields.py
+++ b/cl/search/migrations/0032_update_docket_numbering_fields.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-07-30 20:29
+# Generated by Django 5.0.7 on 2024-08-01 16:31
 
 import django.db.models.deletion
 import pgtrigger.compiler
@@ -22,37 +22,47 @@ class Migration(migrations.Migration):
         ),
         migrations.AddField(
             model_name="docket",
-            name="case_type",
+            name="federal_defendant_number",
+            field=models.SmallIntegerField(
+                blank=True,
+                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
+                null=True,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docket",
+            name="federal_dn_case_type",
             field=models.CharField(
                 blank=True,
                 help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
-                max_length=5,
+                max_length=6,
             ),
         ),
         migrations.AddField(
             model_name="docket",
-            name="defendant_number",
-            field=models.SmallIntegerField(
-                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
-                null=True,
+            name="federal_dn_judge_initials_assigned",
+            field=models.CharField(
+                blank=True,
+                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
+                max_length=5,
             ),
         ),
         migrations.AddField(
             model_name="docket",
-            name="judge_initials",
+            name="federal_dn_judge_initials_referred",
             field=models.CharField(
                 blank=True,
-                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
-                max_length=4,
+                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL-GOG, GOG is the magistrate judge initials.",
+                max_length=5,
             ),
         ),
         migrations.AddField(
             model_name="docket",
-            name="office_code",
+            name="federal_dn_office_code",
             field=models.CharField(
                 blank=True,
-                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
-                max_length=1,
+                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the federal district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
+                max_length=3,
             ),
         ),
         migrations.AddField(
@@ -69,37 +79,47 @@ class Migration(migrations.Migration):
         ),
         migrations.AddField(
             model_name="docketevent",
-            name="case_type",
+            name="federal_defendant_number",
+            field=models.SmallIntegerField(
+                blank=True,
+                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
+                null=True,
+            ),
+        ),
+        migrations.AddField(
+            model_name="docketevent",
+            name="federal_dn_case_type",
             field=models.CharField(
                 blank=True,
                 help_text="Case type, e.g., civil (cv), magistrate (mj), criminal (cr), petty offense (po), and miscellaneous (mc). These codes can be upper case or lower case, and may vary in number of characters.",
-                max_length=5,
+                max_length=6,
             ),
         ),
         migrations.AddField(
             model_name="docketevent",
-            name="defendant_number",
-            field=models.SmallIntegerField(
-                help_text="A unique number assigned to each defendant in a case, typically found in pacer criminal cases as a -1, -2 after the judge initials. Example: 1:14-cr-10363-RGS-1.",
-                null=True,
+            name="federal_dn_judge_initials_assigned",
+            field=models.CharField(
+                blank=True,
+                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
+                max_length=5,
             ),
         ),
         migrations.AddField(
             model_name="docketevent",
-            name="judge_initials",
+            name="federal_dn_judge_initials_referred",
             field=models.CharField(
                 blank=True,
-                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL, MJL is the judge's initials. Judge initials change if a new judge takes over a case.",
-                max_length=4,
+                help_text="A typically three-letter upper cased abbreviation of the judge's initials. In the example 2:07-cv-34911-MJL-GOG, GOG is the magistrate judge initials.",
+                max_length=5,
             ),
         ),
         migrations.AddField(
             model_name="docketevent",
-            name="office_code",
+            name="federal_dn_office_code",
             field=models.CharField(
                 blank=True,
-                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
-                max_length=1,
+                help_text="A one digit statistical code (either alphabetic or numeric) of the office within the federal district. In this example, 2:07-cv-34911-MJL, the 2 preceding the : is the office code.",
+                max_length=3,
             ),
         ),
         migrations.AddField(
@@ -121,9 +141,9 @@ class Migration(migrations.Migration):
             trigger=pgtrigger.compiler.Trigger(
                 name="update_or_delete_snapshot_update",
                 sql=pgtrigger.compiler.UpsertTriggerSql(
-                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."parent_docket_id" IS DISTINCT FROM (NEW."parent_docket_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."office_code" IS DISTINCT FROM (NEW."office_code") OR OLD."case_type" IS DISTINCT FROM (NEW."case_type") OR OLD."judge_initials" IS DISTINCT FROM (NEW."judge_initials") OR OLD."defendant_number" IS DISTINCT FROM (NEW."defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))',
-                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
-                    hash="8154023f4bc7281606f9e0be331261fc3cc71241",
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."parent_docket_id" IS DISTINCT FROM (NEW."parent_docket_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."federal_dn_office_code" IS DISTINCT FROM (NEW."federal_dn_office_code") OR OLD."federal_dn_case_type" IS DISTINCT FROM (NEW."federal_dn_case_type") OR OLD."federal_dn_judge_initials_assigned" IS DISTINCT FROM (NEW."federal_dn_judge_initials_assigned") OR OLD."federal_dn_judge_initials_referred" IS DISTINCT FROM (NEW."federal_dn_judge_initials_referred") OR OLD."federal_defendant_number" IS DISTINCT FROM (NEW."federal_defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))',
+                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "docket_number", "docket_number_core", "federal_defendant_number", "federal_dn_case_type", "federal_dn_judge_initials_assigned", "federal_dn_judge_initials_referred", "federal_dn_office_code", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."docket_number", OLD."docket_number_core", OLD."federal_defendant_number", OLD."federal_dn_case_type", OLD."federal_dn_judge_initials_assigned", OLD."federal_dn_judge_initials_referred", OLD."federal_dn_office_code", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
+                    hash="f2c9e18d74e58ec15e0f9d06a80edb4ae17347e8",
                     operation="UPDATE",
                     pgid="pgtrigger_update_or_delete_snapshot_update_7e039",
                     table="search_docket",
@@ -136,8 +156,8 @@ class Migration(migrations.Migration):
             trigger=pgtrigger.compiler.Trigger(
                 name="update_or_delete_snapshot_delete",
                 sql=pgtrigger.compiler.UpsertTriggerSql(
-                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
-                    hash="5b02a9c2cedd3b32a8c455a966bf31b02576d67f",
+                    func='INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "docket_number", "docket_number_core", "federal_defendant_number", "federal_dn_case_type", "federal_dn_judge_initials_assigned", "federal_dn_judge_initials_referred", "federal_dn_office_code", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."docket_number", OLD."docket_number_core", OLD."federal_defendant_number", OLD."federal_dn_case_type", OLD."federal_dn_judge_initials_assigned", OLD."federal_dn_judge_initials_referred", OLD."federal_dn_office_code", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;',
+                    hash="a4b1625360e32dfb7392272ed99823a289ea336a",
                     operation="DELETE",
                     pgid="pgtrigger_update_or_delete_snapshot_delete_7294f",
                     table="search_docket",
diff --git a/cl/search/migrations/0032_update_docket_numbering_fields.sql b/cl/search/migrations/0032_update_docket_numbering_fields.sql
index 97ac5fc363..21401a4297 100644
--- a/cl/search/migrations/0032_update_docket_numbering_fields.sql
+++ b/cl/search/migrations/0032_update_docket_numbering_fields.sql
@@ -8,47 +8,57 @@ DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_7294f ON "sear
 --
 DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket";
 --
--- Add field case_type to docket
+-- Add field federal_defendant_number to docket
 --
-ALTER TABLE "search_docket" ADD COLUMN "case_type" varchar(5) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docket" ALTER COLUMN "case_type" DROP DEFAULT;
+ALTER TABLE "search_docket" ADD COLUMN "federal_defendant_number" smallint NULL;
 --
--- Add field defendant_number to docket
+-- Add field federal_dn_case_type to docket
 --
-ALTER TABLE "search_docket" ADD COLUMN "defendant_number" smallint NULL;
+ALTER TABLE "search_docket" ADD COLUMN "federal_dn_case_type" varchar(6) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_case_type" DROP DEFAULT;
 --
--- Add field judge_initials to docket
+-- Add field federal_dn_judge_initials_assigned to docket
 --
-ALTER TABLE "search_docket" ADD COLUMN "judge_initials" varchar(4) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docket" ALTER COLUMN "judge_initials" DROP DEFAULT;
+ALTER TABLE "search_docket" ADD COLUMN "federal_dn_judge_initials_assigned" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_judge_initials_assigned" DROP DEFAULT;
 --
--- Add field office_code to docket
+-- Add field federal_dn_judge_initials_referred to docket
 --
-ALTER TABLE "search_docket" ADD COLUMN "office_code" varchar(1) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docket" ALTER COLUMN "office_code" DROP DEFAULT;
+ALTER TABLE "search_docket" ADD COLUMN "federal_dn_judge_initials_referred" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_judge_initials_referred" DROP DEFAULT;
+--
+-- Add field federal_dn_office_code to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "federal_dn_office_code" varchar(3) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_office_code" DROP DEFAULT;
 --
 -- Add field parent_docket to docket
 --
 ALTER TABLE "search_docket" ADD COLUMN "parent_docket_id" integer NULL CONSTRAINT "search_docket_parent_docket_id_1a514426_fk_search_docket_id" REFERENCES "search_docket"("id") DEFERRABLE INITIALLY DEFERRED; SET CONSTRAINTS "search_docket_parent_docket_id_1a514426_fk_search_docket_id" IMMEDIATE;
 --
--- Add field case_type to docketevent
+-- Add field federal_defendant_number to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_defendant_number" smallint NULL;
+--
+-- Add field federal_dn_case_type to docketevent
 --
-ALTER TABLE "search_docketevent" ADD COLUMN "case_type" varchar(5) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docketevent" ALTER COLUMN "case_type" DROP DEFAULT;
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_case_type" varchar(6) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_case_type" DROP DEFAULT;
 --
--- Add field defendant_number to docketevent
+-- Add field federal_dn_judge_initials_assigned to docketevent
 --
-ALTER TABLE "search_docketevent" ADD COLUMN "defendant_number" smallint NULL;
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_judge_initials_assigned" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_judge_initials_assigned" DROP DEFAULT;
 --
--- Add field judge_initials to docketevent
+-- Add field federal_dn_judge_initials_referred to docketevent
 --
-ALTER TABLE "search_docketevent" ADD COLUMN "judge_initials" varchar(4) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docketevent" ALTER COLUMN "judge_initials" DROP DEFAULT;
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_judge_initials_referred" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_judge_initials_referred" DROP DEFAULT;
 --
--- Add field office_code to docketevent
+-- Add field federal_dn_office_code to docketevent
 --
-ALTER TABLE "search_docketevent" ADD COLUMN "office_code" varchar(1) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docketevent" ALTER COLUMN "office_code" DROP DEFAULT;
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_office_code" varchar(3) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_office_code" DROP DEFAULT;
 --
 -- Add field parent_docket to docketevent
 --
@@ -91,7 +101,7 @@ ALTER TABLE "search_docketevent" ADD COLUMN "parent_docket_id" integer NULL;
                             RETURN NEW;
                         END IF;
                     END IF;
-                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
+                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "docket_number", "docket_number_core", "federal_defendant_number", "federal_dn_case_type", "federal_dn_judge_initials_assigned", "federal_dn_judge_initials_referred", "federal_dn_office_code", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."docket_number", OLD."docket_number_core", OLD."federal_defendant_number", OLD."federal_dn_case_type", OLD."federal_dn_judge_initials_assigned", OLD."federal_dn_judge_initials_referred", OLD."federal_dn_office_code", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
                 END;
             $$ LANGUAGE plpgsql;
 
@@ -100,10 +110,10 @@ ALTER TABLE "search_docketevent" ADD COLUMN "parent_docket_id" integer NULL;
                 AFTER UPDATE ON "search_docket"
 
 
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."parent_docket_id" IS DISTINCT FROM (NEW."parent_docket_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."office_code" IS DISTINCT FROM (NEW."office_code") OR OLD."case_type" IS DISTINCT FROM (NEW."case_type") OR OLD."judge_initials" IS DISTINCT FROM (NEW."judge_initials") OR OLD."defendant_number" IS DISTINCT FROM (NEW."defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."parent_docket_id" IS DISTINCT FROM (NEW."parent_docket_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."federal_dn_office_code" IS DISTINCT FROM (NEW."federal_dn_office_code") OR OLD."federal_dn_case_type" IS DISTINCT FROM (NEW."federal_dn_case_type") OR OLD."federal_dn_judge_initials_assigned" IS DISTINCT FROM (NEW."federal_dn_judge_initials_assigned") OR OLD."federal_dn_judge_initials_referred" IS DISTINCT FROM (NEW."federal_dn_judge_initials_referred") OR OLD."federal_defendant_number" IS DISTINCT FROM (NEW."federal_defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_7e039();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket" IS '8154023f4bc7281606f9e0be331261fc3cc71241';
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket" IS 'f2c9e18d74e58ec15e0f9d06a80edb4ae17347e8';
 
 --
 -- Create trigger update_or_delete_snapshot_delete on model docket
@@ -143,7 +153,7 @@ ALTER TABLE "search_docketevent" ADD COLUMN "parent_docket_id" integer NULL;
                             RETURN NEW;
                         END IF;
                     END IF;
-                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "case_type", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "defendant_number", "docket_number", "docket_number_core", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "judge_initials", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "office_code", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."case_type", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."defendant_number", OLD."docket_number", OLD."docket_number_core", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."judge_initials", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."office_code", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
+                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "docket_number", "docket_number_core", "federal_defendant_number", "federal_dn_case_type", "federal_dn_judge_initials_assigned", "federal_dn_judge_initials_referred", "federal_dn_office_code", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."docket_number", OLD."docket_number_core", OLD."federal_defendant_number", OLD."federal_dn_case_type", OLD."federal_dn_judge_initials_assigned", OLD."federal_dn_judge_initials_referred", OLD."federal_dn_office_code", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
                 END;
             $$ LANGUAGE plpgsql;
 
@@ -155,7 +165,7 @@ ALTER TABLE "search_docketevent" ADD COLUMN "parent_docket_id" integer NULL;
                 FOR EACH ROW
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_7294f();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket" IS '5b02a9c2cedd3b32a8c455a966bf31b02576d67f';
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket" IS 'a4b1625360e32dfb7392272ed99823a289ea336a';
 
 CREATE INDEX "search_docket_parent_docket_id_1a514426" ON "search_docket" ("parent_docket_id");
 CREATE INDEX "search_docketevent_parent_docket_id_c7c9c9ad" ON "search_docketevent" ("parent_docket_id");

From 0c258a8eeb8cce4b72f5a395320a9c46dc7a8791 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Aug 2024 16:46:29 +0000
Subject: [PATCH 120/372] build(deps-dev): bump django-stubs from 4.2.7 to
 5.0.4

Bumps [django-stubs](https://github.com/typeddjango/django-stubs) from 4.2.7 to 5.0.4.
- [Release notes](https://github.com/typeddjango/django-stubs/releases)
- [Commits](https://github.com/typeddjango/django-stubs/compare/4.2.7...5.0.4)

---
updated-dependencies:
- dependency-name: django-stubs
  dependency-type: direct:development
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 41 ++++++++++++++++-------------------------
 pyproject.toml |  2 +-
 2 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a7e0fa7c3a..31019aa3e2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1229,34 +1229,36 @@ sftp = ["paramiko (>=1.15)"]
 
 [[package]]
 name = "django-stubs"
-version = "4.2.7"
+version = "5.0.4"
 description = "Mypy stubs for Django"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "django-stubs-4.2.7.tar.gz", hash = "sha256:8ccd2ff4ee5adf22b9e3b7b1a516d2e1c2191e9d94e672c35cc2bc3dd61e0f6b"},
-    {file = "django_stubs-4.2.7-py3-none-any.whl", hash = "sha256:4cf4de258fa71adc6f2799e983091b9d46cfc67c6eebc68fe111218c9a62b3b8"},
+    {file = "django_stubs-5.0.4-py3-none-any.whl", hash = "sha256:c2502f5ecbae50c68f9a86d52b5b2447d8648fd205036dad0ccb41e19a445927"},
+    {file = "django_stubs-5.0.4.tar.gz", hash = "sha256:78e3764488fdfd2695f12502136548ec22f8d4b1780541a835042b8238d11514"},
 ]
 
 [package.dependencies]
+asgiref = "*"
 django = "*"
-django-stubs-ext = ">=4.2.7"
-types-pytz = "*"
+django-stubs-ext = ">=5.0.4"
 types-PyYAML = "*"
-typing-extensions = "*"
+typing-extensions = ">=4.11.0"
 
 [package.extras]
-compatible-mypy = ["mypy (>=1.7.0,<1.8.0)"]
+compatible-mypy = ["mypy (>=1.11.0,<1.12.0)"]
+oracle = ["oracledb"]
+redis = ["redis"]
 
 [[package]]
 name = "django-stubs-ext"
-version = "4.2.7"
+version = "5.0.4"
 description = "Monkey-patching and extensions for django-stubs"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "django-stubs-ext-4.2.7.tar.gz", hash = "sha256:519342ac0849cda1559746c9a563f03ff99f636b0ebe7c14b75e816a00dfddc3"},
-    {file = "django_stubs_ext-4.2.7-py3-none-any.whl", hash = "sha256:45a5d102417a412e3606e3c358adb4744988a92b7b58ccf3fd64bddd5d04d14c"},
+    {file = "django_stubs_ext-5.0.4-py3-none-any.whl", hash = "sha256:910cbaff3d1e8e806a5c27d5ddd4088535aae8371ea921b7fd680fdfa5f14e30"},
+    {file = "django_stubs_ext-5.0.4.tar.gz", hash = "sha256:85da065224204774208be29c7d02b4482d5a69218a728465c2fbe41725fdc819"},
 ]
 
 [package.dependencies]
@@ -4973,17 +4975,6 @@ files = [
     {file = "types_python_dateutil-2.8.19.20240106-py3-none-any.whl", hash = "sha256:efbbdc54590d0f16152fa103c9879c7d4a00e82078f6e2cf01769042165acaa2"},
 ]
 
-[[package]]
-name = "types-pytz"
-version = "2023.3.1.1"
-description = "Typing stubs for pytz"
-optional = false
-python-versions = "*"
-files = [
-    {file = "types-pytz-2023.3.1.1.tar.gz", hash = "sha256:cc23d0192cd49c8f6bba44ee0c81e4586a8f30204970fc0894d209a6b08dab9a"},
-    {file = "types_pytz-2023.3.1.1-py3-none-any.whl", hash = "sha256:1999a123a3dc0e39a2ef6d19f3f8584211de9e6a77fe7a0259f04a524e90a5cf"},
-]
-
 [[package]]
 name = "types-pyyaml"
 version = "6.0.12.12"
@@ -5037,13 +5028,13 @@ files = [
 
 [[package]]
 name = "typing-extensions"
-version = "4.9.0"
+version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"},
-    {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"},
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
 
 [[package]]
@@ -5481,4 +5472,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "93780fee54ecdb06fc37a54588220180021dca8c08d87c40b1784f9360439e21"
+content-hash = "fb84886cbf54fbf0a17a0850fd52bd30306c24ff7c2c2c5e0a082774a3e7ac0b"
diff --git a/pyproject.toml b/pyproject.toml
index 244a677a58..9189b32b3f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -129,7 +129,7 @@ flynt = "^1.0.1"
 mypy = "^1.8.0"
 types-python-dateutil = "^2.8.19.20240106"
 types-requests = "^2.31.0.20240106"
-django-stubs = "^4.2.7"
+django-stubs = "^5.0.4"
 djangorestframework-stubs = "^3.14.5"
 black = "^23.12.1"
 types-simplejson = "^3.19.0.2"

From 1a7ffd35a2c86729b46b8a03495f17aec47218cc Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 1 Aug 2024 14:33:37 -0500
Subject: [PATCH 121/372] fix(lint): Fixed failed types hints after
 django-stubs update

---
 cl/alerts/views.py                            |  2 +-
 cl/citations/api_views.py                     |  6 +++--
 cl/citations/parenthetical_utils.py           |  2 +-
 cl/citations/tasks.py                         | 10 ++++---
 cl/citations/types.py                         |  2 +-
 .../management/commands/troller_bk.py         |  2 +-
 cl/lib/elasticsearch_utils.py                 |  2 +-
 cl/opinion_page/forms.py                      |  4 +--
 cl/opinion_page/utils.py                      |  4 +--
 cl/opinion_page/views.py                      | 27 ++++++++++---------
 poetry.lock                                   | 15 +++++++++--
 pyproject.toml                                |  1 +
 12 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/cl/alerts/views.py b/cl/alerts/views.py
index e7a46f2670..aecf21a78e 100644
--- a/cl/alerts/views.py
+++ b/cl/alerts/views.py
@@ -274,7 +274,7 @@ async def new_docket_alert(request: AuthenticatedHttpRequest) -> HttpResponse:
         ).aearliest("date_created")
 
     title = f"New Docket Alert for {make_docket_title(docket)}"
-    has_alert = await user_has_alert(await request.auser(), docket)  # type: ignore[attr-defined]
+    has_alert = await user_has_alert(await request.auser(), docket)  # type: ignore[arg-type]
     return TemplateResponse(
         request,
         "docket_alert_new.html",
diff --git a/cl/citations/api_views.py b/cl/citations/api_views.py
index 93fe15c2ca..24f8a278a2 100644
--- a/cl/citations/api_views.py
+++ b/cl/citations/api_views.py
@@ -188,7 +188,9 @@ def _citation_handler(
 
     def _get_clusters_for_canonical_list(
         self, reporters: list[SafeString], volume: int, page: str
-    ) -> tuple[QuerySet[OpinionCluster] | None, int, list[str]]:
+    ) -> tuple[
+        QuerySet[OpinionCluster, OpinionCluster] | None, int, list[str]
+    ]:
         """
         Retrieves opinion clusters associated with a list of reporter slugs.
 
@@ -227,7 +229,7 @@ def _get_clusters_for_canonical_list(
 
     def _format_cluster_response(
         self,
-        clusters: QuerySet[OpinionCluster],
+        clusters: QuerySet[OpinionCluster, OpinionCluster],
         cluster_count: int,
     ) -> CitationAPIResponse:
         """
diff --git a/cl/citations/parenthetical_utils.py b/cl/citations/parenthetical_utils.py
index 9d126575b8..91bb6db132 100644
--- a/cl/citations/parenthetical_utils.py
+++ b/cl/citations/parenthetical_utils.py
@@ -8,7 +8,7 @@
 
 async def get_or_create_parenthetical_groups(
     cluster: OpinionCluster,
-) -> QuerySet[ParentheticalGroup]:
+) -> QuerySet[ParentheticalGroup, ParentheticalGroup]:
     """
     Given a cluster, return its existing ParentheticalGroup's from the database
     or compute and store new ones if they do not yet exist or need to be updated.
diff --git a/cl/citations/tasks.py b/cl/citations/tasks.py
index 69afeae7e9..335af6a04f 100644
--- a/cl/citations/tasks.py
+++ b/cl/citations/tasks.py
@@ -89,9 +89,9 @@ def find_citations_and_parantheticals_for_recap_documents(
 
     :return: None
     """
-    documents: QuerySet[RECAPDocument] = RECAPDocument.objects.filter(
-        pk__in=doc_ids
-    ).filter(
+    documents: QuerySet[
+        RECAPDocument, RECAPDocument
+    ] = RECAPDocument.objects.filter(pk__in=doc_ids).filter(
         ocr_status__in=[
             RECAPDocument.OCR_UNNECESSARY,
             RECAPDocument.OCR_COMPLETE,
@@ -118,7 +118,9 @@ def find_citations_and_parentheticals_for_opinion_by_pks(
     :param index: Whether to add the items to Solr
     :return: None
     """
-    opinions: QuerySet[Opinion] = Opinion.objects.filter(pk__in=opinion_pks)
+    opinions: QuerySet[Opinion, Opinion] = Opinion.objects.filter(
+        pk__in=opinion_pks
+    )
     for opinion in opinions:
         try:
             store_opinion_citations_and_update_parentheticals(opinion, index)
diff --git a/cl/citations/types.py b/cl/citations/types.py
index a84dbfd24b..c56bce4956 100644
--- a/cl/citations/types.py
+++ b/cl/citations/types.py
@@ -23,4 +23,4 @@ class CitationAPIResponse(TypedDict):
     status: int
     normalized_citations: NotRequired[list[str]]
     error_message: NotRequired[str]
-    clusters: NotRequired[QuerySet[OpinionCluster]]
+    clusters: NotRequired[QuerySet[OpinionCluster, OpinionCluster]]
diff --git a/cl/corpus_importer/management/commands/troller_bk.py b/cl/corpus_importer/management/commands/troller_bk.py
index 9c5a50cf55..25d2930d23 100644
--- a/cl/corpus_importer/management/commands/troller_bk.py
+++ b/cl/corpus_importer/management/commands/troller_bk.py
@@ -669,7 +669,7 @@ def handle(self, *args, **options):
                 "The 'file' argument is required for that action."
             )
 
-        threads: list[threading.Thread] = []
+        threads = []
         try:
             iterate_and_import_files(options, threads)
         except KeyboardInterrupt:
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index c9a729c9fa..8cc5fe87f2 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1663,7 +1663,7 @@ def merge_courts_from_db(results: Page, search_type: str) -> None:
 
 
 def fill_position_mapping(
-    positions: QuerySet[Position],
+    positions: QuerySet[Position, Position],
     request_type: Literal["frontend", "v3", "v4"] = "frontend",
 ) -> BasePositionMapping | ApiPositionMapping:
     """Extract all the data from the position queryset and
diff --git a/cl/opinion_page/forms.py b/cl/opinion_page/forms.py
index ecc96b0b40..c4eefd75c1 100644
--- a/cl/opinion_page/forms.py
+++ b/cl/opinion_page/forms.py
@@ -651,13 +651,13 @@ class TennWorkCompAppUploadForm(BaseCourtUploadForm):
     """Form for Tennessee Workers' Compensation Appeals Board (tennworkcompapp)
     Upload Portal"""
 
-    second_judge = forms.ModelChoiceField(
+    second_judge: forms.ModelChoiceField = forms.ModelChoiceField(
         queryset=Person.objects.none(),
         required=False,
         label="Second Panelist",
         widget=forms.Select(attrs={"class": "form-control"}),
     )
-    third_judge = forms.ModelChoiceField(
+    third_judge: forms.ModelChoiceField = forms.ModelChoiceField(
         queryset=Person.objects.none(),
         required=False,
         label="Third Panelist",
diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index bc74cfb5e3..f9a9e24de8 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -42,7 +42,7 @@ async def core_docket_data(
     pk: int,
 ) -> Tuple[Docket, Dict[str, Union[bool, str, Docket, NoteForm]]]:
     """Gather the core data for a docket, party, or IDB page."""
-    docket = await aget_object_or_404(Docket, pk=pk)
+    docket: Docket = await aget_object_or_404(Docket, pk=pk)
     title = make_docket_title(docket)
 
     try:
@@ -60,7 +60,7 @@ async def core_docket_data(
     else:
         note_form = NoteForm(instance=note)
 
-    has_alert = await user_has_alert(await request.auser(), docket)  # type: ignore[attr-defined]
+    has_alert = await user_has_alert(await request.auser(), docket)  # type: ignore[arg-type]
 
     return (
         docket,
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 75295d223b..11c2fb0c74 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -222,8 +222,8 @@ async def court_publish_page(request: HttpRequest, pk: str) -> HttpResponse:
             "Mississippi Supreme Court and Mississippi Court of Appeals."
         )
     # Validate the user has permission
-    user = await request.auser()  # type: ignore[attr-defined]
-    if not user.is_staff and not user.is_superuser:
+    user = await request.auser()
+    if not user.is_staff and not user.is_superuser:  # type: ignore[union-attr]
         if not await user.groups.filter(  # type: ignore
             name__in=[f"uploaders_{pk}"]
         ).aexists():
@@ -328,7 +328,7 @@ async def redirect_docket_recap(
     court: Court,
     pacer_case_id: str,
 ) -> HttpResponseRedirect:
-    docket = await aget_object_or_404(
+    docket: Docket = await aget_object_or_404(
         Docket, pacer_case_id=pacer_case_id, court=court
     )
     return HttpResponseRedirect(
@@ -745,7 +745,7 @@ async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
     unbound form.
     """
     # Look up the court, cluster, title and note information
-    cluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
     title = ", ".join(
         [
             s
@@ -866,7 +866,7 @@ async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
 async def view_summaries(
     request: HttpRequest, pk: int, slug: str
 ) -> HttpResponse:
-    cluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
     parenthetical_groups_qs = await get_or_create_parenthetical_groups(cluster)
     parenthetical_groups = [
         parenthetical_group
@@ -899,7 +899,7 @@ async def view_summaries(
 async def view_authorities(
     request: HttpRequest, pk: int, slug: str, doc_type=0
 ) -> HttpResponse:
-    cluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
 
     return TemplateResponse(
         request,
@@ -918,7 +918,7 @@ async def view_authorities(
 async def cluster_visualizations(
     request: HttpRequest, pk: int, slug: str
 ) -> HttpResponse:
-    cluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
     return TemplateResponse(
         request,
         "opinion_visualizations.html",
@@ -1322,7 +1322,7 @@ async def citation_homepage(request: HttpRequest) -> HttpResponse:
 async def block_item(request: HttpRequest) -> HttpResponse:
     """Block an item from search results using AJAX"""
     user = await request.auser()  # type: ignore[attr-defined]
-    if is_ajax(request) and user.is_superuser:
+    if is_ajax(request) and user.is_superuser:  # type: ignore[union-attr]
         obj_type = request.POST["type"]
         pk = request.POST["id"]
 
@@ -1331,13 +1331,14 @@ async def block_item(request: HttpRequest) -> HttpResponse:
                 "This view can not handle the provided type"
             )
 
-        cluster = None
+        cluster: OpinionCluster | None = None
         if obj_type == "cluster":
             # Block the cluster
             cluster = await aget_object_or_404(OpinionCluster, pk=pk)
-            cluster.blocked = True
-            cluster.date_blocked = now()
-            await cluster.asave(index=False)
+            if cluster is not None:
+                cluster.blocked = True
+                cluster.date_blocked = now()
+                await cluster.asave(index=False)
 
         docket_pk = (
             pk
@@ -1347,7 +1348,7 @@ async def block_item(request: HttpRequest) -> HttpResponse:
         if not docket_pk:
             return HttpResponse("It worked")
 
-        d = await aget_object_or_404(Docket, pk=docket_pk)
+        d: Docket = await aget_object_or_404(Docket, pk=docket_pk)
         d.blocked = True
         d.date_blocked = now()
         await d.asave()
diff --git a/poetry.lock b/poetry.lock
index 31019aa3e2..aa078b45d2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -4975,6 +4975,17 @@ files = [
     {file = "types_python_dateutil-2.8.19.20240106-py3-none-any.whl", hash = "sha256:efbbdc54590d0f16152fa103c9879c7d4a00e82078f6e2cf01769042165acaa2"},
 ]
 
+[[package]]
+name = "types-pytz"
+version = "2024.1.0.20240417"
+description = "Typing stubs for pytz"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "types-pytz-2024.1.0.20240417.tar.gz", hash = "sha256:6810c8a1f68f21fdf0f4f374a432487c77645a0ac0b31de4bf4690cf21ad3981"},
+    {file = "types_pytz-2024.1.0.20240417-py3-none-any.whl", hash = "sha256:8335d443310e2db7b74e007414e74c4f53b67452c0cb0d228ca359ccfba59659"},
+]
+
 [[package]]
 name = "types-pyyaml"
 version = "6.0.12.12"
@@ -5472,4 +5483,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "fb84886cbf54fbf0a17a0850fd52bd30306c24ff7c2c2c5e0a082774a3e7ac0b"
+content-hash = "1d955b678d4933608661b478e8ed00f506e24e9f4026fcf070b5bb9096b9d599"
diff --git a/pyproject.toml b/pyproject.toml
index 9189b32b3f..6a410b0f13 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -115,6 +115,7 @@ tiktoken = "^0.6.0"
 hyperscan = "^0.7.7"
 openai = "^1.31.1"
 seal-rookery = "^2.2.3"
+types-pytz = "^2024.1.0.20240417"
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = "^3.7.0"

From f679a727986093c0d8395a10117de54e77a27e6d Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 1 Aug 2024 14:55:21 -0500
Subject: [PATCH 122/372] fix(lint): Fixed failed types hints in api.models

---
 cl/api/models.py  | 32 ++++++++++++++++----------------
 cl/tests/utils.py |  2 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/cl/api/models.py b/cl/api/models.py
index 0e28a15f50..2bffe2d9c0 100644
--- a/cl/api/models.py
+++ b/cl/api/models.py
@@ -26,28 +26,28 @@ class WebhookEventType(models.IntegerChoices):
     AfterUpdateOrDeleteSnapshot(), model_name="WebhookHistoryEvent"
 )
 class Webhook(AbstractDateTimeModel):
-    user = models.ForeignKey(
+    user: models.ForeignKey = models.ForeignKey(
         User,
         help_text="The user that has provisioned the webhook.",
         related_name="webhooks",
         on_delete=models.CASCADE,
     )
-    event_type = models.IntegerField(
+    event_type: models.IntegerField = models.IntegerField(
         help_text="The event type that triggers the webhook.",
         choices=WebhookEventType.choices,
     )
-    url = models.URLField(
+    url: models.URLField = models.URLField(
         help_text="The URL that receives a POST request from the webhook.",
         max_length=2000,
         validators=[URLValidator(schemes=["https"])],
     )
-    enabled = models.BooleanField(
+    enabled: models.BooleanField = models.BooleanField(
         help_text="An on/off switch for the webhook.", default=False
     )
-    version = models.IntegerField(
+    version: models.IntegerField = models.IntegerField(
         help_text="The specific version of the webhook provisioned.", default=1
     )
-    failure_count = models.IntegerField(
+    failure_count: models.IntegerField = models.IntegerField(
         help_text="The number of failures (400+ status) responses the webhook "
         "has received.",
         default=0,
@@ -75,51 +75,51 @@ class WEBHOOK_EVENT_STATUS:
 
 
 class WebhookEvent(AbstractDateTimeModel):
-    webhook = models.ForeignKey(
+    webhook: models.ForeignKey = models.ForeignKey(
         Webhook,
         help_text="The Webhook this event is associated with.",
         related_name="webhook_events",
         on_delete=models.CASCADE,
     )
-    event_id = models.UUIDField(
+    event_id: models.UUIDField = models.UUIDField(
         help_text="Unique event identifier",
         default=uuid.uuid4,
         editable=False,
     )
-    event_status = models.SmallIntegerField(
+    event_status: models.SmallIntegerField = models.SmallIntegerField(
         help_text="The webhook event status.",
         default=WEBHOOK_EVENT_STATUS.IN_PROGRESS,
         choices=WEBHOOK_EVENT_STATUS.STATUS,
     )
-    content = models.JSONField(  # type: ignore
+    content: models.JSONField = models.JSONField(
         help_text="The content of the outgoing body in the POST request.",
         blank=True,
         null=True,
     )
-    next_retry_date = models.DateTimeField(
+    next_retry_date: models.DateTimeField = models.DateTimeField(
         help_text="The scheduled datetime to retry the webhook event.",
         blank=True,
         null=True,
     )
-    error_message = models.TextField(
+    error_message: models.TextField = models.TextField(
         help_text="The error raised by a failed POST request.",
         blank=True,
     )
-    response = models.TextField(
+    response: models.TextField = models.TextField(
         help_text="The response received from the POST request.",
         blank=True,
     )
-    retry_counter = models.SmallIntegerField(
+    retry_counter: models.SmallIntegerField = models.SmallIntegerField(
         help_text="The retry counter for the exponential backoff event.",
         default=0,
     )
-    status_code = models.SmallIntegerField(
+    status_code: models.SmallIntegerField = models.SmallIntegerField(
         help_text="The HTTP status code received from the POST request.",
         choices=HttpStatusCodes.choices,  # type: ignore[attr-defined]
         blank=True,
         null=True,
     )
-    debug = models.BooleanField(
+    debug: models.BooleanField = models.BooleanField(
         help_text="Enabled if this is a test event for debugging purposes.",
         default=False,
     )
diff --git a/cl/tests/utils.py b/cl/tests/utils.py
index fd16537a6b..e05645572f 100644
--- a/cl/tests/utils.py
+++ b/cl/tests/utils.py
@@ -101,7 +101,7 @@ async def request(self, **kwargs):
 
 def make_client(user_pk: int) -> AsyncAPIClient:
     user = User.objects.get(pk=user_pk)
-    token, created = Token.objects.get_or_create(user=user)
+    token, created = Token.objects.get_or_create(user=user)  # type: ignore[attr-defined]
     token_header = f"Token {token}"
     client = AsyncAPIClient()
     client.credentials(HTTP_AUTHORIZATION=token_header)

From d775bbfc5730f48da0f97063687467aa0ef29194 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 31 Jul 2024 20:05:19 -0400
Subject: [PATCH 123/372] feat(recap): Use de_seq_num when processing recap
 emails

---
 cl/corpus_importer/tasks.py |  5 ++-
 cl/recap/tasks.py           |  7 +++++
 cl/recap/tests.py           | 62 ++++++++++++++++++-------------------
 3 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 6e6e1a2ef1..3c3d31c919 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -1986,6 +1986,7 @@ def download_pdf_by_magic_number(
     session_data: SessionData,
     magic_number: str,
     appellate: bool = False,
+    de_seq_num: str | None = None,
 ) -> tuple[Response | None, str]:
     """Small wrapper to fetch a PACER PDF document by magic number.
 
@@ -1996,6 +1997,8 @@ def download_pdf_by_magic_number(
     and proxy.
     :param magic_number: The magic number to fetch PACER documents for free.
     :param appellate: Whether the download belongs to an appellate court.
+    :param de_seq_num: The sequential number assigned by the PACER system to
+     identify the docket entry within a case.
     :return: A two-tuple of requests.Response object usually containing a PDF,
     or None if that wasn't possible, and a string representing the error if
     there was one.
@@ -2005,7 +2008,7 @@ def download_pdf_by_magic_number(
     )
     report = FreeOpinionReport(court_id, s)
     r, r_msg = report.download_pdf(
-        pacer_case_id, pacer_doc_id, magic_number, appellate
+        pacer_case_id, pacer_doc_id, magic_number, appellate, de_seq_num
     )
     return r, r_msg
 
diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index ca78940de6..276773f96e 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -2196,6 +2196,7 @@ def download_pacer_pdf_and_save_to_pq(
     user_pk: int,
     appellate: bool,
     attachment_number: int = None,
+    de_seq_num: str | None = None,
 ) -> ProcessingQueue:
     """Try to download a PACER document from the notification via the magic
     link and store it in a ProcessingQueue object. So it can be copied to every
@@ -2218,6 +2219,8 @@ def download_pacer_pdf_and_save_to_pq(
     :param appellate: Whether the download belongs to an appellate court.
     :param attachment_number: The RECAPDocument attachment_number in case the
      request belongs to an attachment document.
+    :param de_seq_num: The sequential number assigned by the PACER system to
+     identify the docket entry within a case.
     :return: The ProcessingQueue object that's created or returned if existed.
     """
 
@@ -2245,6 +2248,7 @@ def download_pacer_pdf_and_save_to_pq(
                 session_data,
                 magic_number,
                 appellate,
+                de_seq_num,
             )
             if response:
                 file_name = get_document_filename(
@@ -2495,6 +2499,7 @@ def process_recap_email(
             pacer_doc_id = docket_entry["pacer_doc_id"]
             pacer_case_id = docket_entry["pacer_case_id"]
             document_url = docket_entry["document_url"]
+            pacer_seq_no = docket_entry["pacer_seq_no"]
             break
 
     # Some notifications don't contain a magic number at all, assign the
@@ -2503,6 +2508,7 @@ def process_recap_email(
         pacer_doc_id = dockets[0]["docket_entries"][0]["pacer_doc_id"]
         pacer_case_id = dockets[0]["docket_entries"][0]["pacer_case_id"]
         document_url = dockets[0]["docket_entries"][0]["document_url"]
+        pacer_seq_no = dockets[0]["docket_entries"][0]["pacer_seq_no"]
 
     start_time = now()
     # Ensures we have PACER cookies ready to go.
@@ -2521,6 +2527,7 @@ def process_recap_email(
         pacer_doc_id,
         user_pk,
         appellate,
+        de_seq_num=pacer_seq_no,
     )
     is_potentially_sealed_entry = (
         is_docket_entry_sealed(epq.court_id, pacer_case_id, pacer_doc_id)
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index a9f1d31bee..8ac255fdd8 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -3125,7 +3125,7 @@ def setUp(self) -> None:
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -3200,7 +3200,7 @@ async def test_new_recap_email_case_auto_subscription(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -3293,7 +3293,7 @@ async def test_new_recap_email_case_auto_subscription_prev_user(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -3356,7 +3356,7 @@ async def test_new_recap_email_case_no_auto_subscription(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -3447,7 +3447,7 @@ async def test_new_recap_email_case_no_auto_subscription_prev_user(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(
                 200,
                 mock_bucket_open("nda_document.pdf", "rb", True),
@@ -3494,7 +3494,7 @@ async def test_no_recap_email_user_found(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -3650,7 +3650,7 @@ async def test_receive_same_recap_email_notification_different_users(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -3720,7 +3720,7 @@ async def test_new_recap_email_subscribe_by_email_link(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -3814,7 +3814,7 @@ async def test_new_recap_email_unsubscribe_by_email_link(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -3965,7 +3965,7 @@ async def test_new_recap_email_alerts_integration(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -4051,7 +4051,7 @@ async def test_docket_alert_toggle_confirmation_fails(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(200, b""),
             "OK",
         ),
@@ -4208,7 +4208,7 @@ async def test_new_recap_email_with_attachments(
     )
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(
                 200,
                 mock_bucket_open(
@@ -4254,7 +4254,7 @@ async def test_extract_pdf_for_recap_email(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(200, b""),
             "OK",
         ),
@@ -4309,7 +4309,7 @@ async def test_new_nda_recap_email(
     )
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(200, b""),
             "OK",
         ),
@@ -4381,7 +4381,7 @@ async def test_new_nda_recap_email_case_auto_subscription(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(200, b""),
             "OK",
         ),
@@ -4452,7 +4452,7 @@ async def test_new_nda_recap_email_case_no_auto_subscription(
     )
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(200, b"Hello World"),
             "OK",
         ),
@@ -4623,7 +4623,7 @@ async def test_multiple_docket_nef(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -4709,7 +4709,7 @@ async def test_recap_email_no_magic_number(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             None,
             "Document not available from magic link.",
         ),
@@ -4757,7 +4757,7 @@ async def test_mark_as_sealed_nda_document_not_available_from_magic_link(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -5075,7 +5075,7 @@ async def test_recap_email_sealed_entry_with_attachments(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -5240,7 +5240,7 @@ def test_copy_pdf_attachments_from_pqs(self):
     )
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(200, b"Hello World from magic"),
             "OK",
         ),
@@ -5473,7 +5473,7 @@ def setUp(self) -> None:
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(
                 200,
                 mock_bucket_open("nda_document.pdf", "rb", True),
@@ -5511,7 +5511,7 @@ async def test_nda_get_document_number_from_pdf(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(
                 200,
                 mock_bucket_open(
@@ -5558,7 +5558,7 @@ async def test_nda_get_document_number_from_confirmation_page(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(
                 200,
                 mock_bucket_open(
@@ -5604,7 +5604,7 @@ async def test_nda_get_document_number_fallback(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(200, b""),
             "OK",
         ),
@@ -5643,7 +5643,7 @@ async def test_nda_not_document_number_available(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             MockResponse(200, b""),
             "OK",
         ),
@@ -5693,7 +5693,7 @@ async def test_receive_same_recap_email_nda_notification_different_users(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (
+        side_effect=lambda z, x, c, v, b, d, e: (
             None,
             "Document not available from magic link.",
         ),
@@ -6089,7 +6089,7 @@ def test_webhook_response_status_codes(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     async def test_update_webhook_after_http_error(
         self,
@@ -6161,7 +6161,7 @@ async def test_update_webhook_after_http_error(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     async def test_update_webhook_after_network_error(
         self,
@@ -6234,7 +6234,7 @@ async def test_update_webhook_after_network_error(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     async def test_success_webhook_delivery(
         self,
@@ -6300,7 +6300,7 @@ async def test_success_webhook_delivery(
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
-        side_effect=lambda z, x, c, v, b, d: (None, ""),
+        side_effect=lambda z, x, c, v, b, d, e: (None, ""),
     )
     async def test_retry_webhooks_integration(
         self,

From e504f5f0c698e3b97c0779f628d729a44eba164a Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Thu, 1 Aug 2024 19:30:46 -0400
Subject: [PATCH 124/372] feat(recap): Use de_seq_num when processing
 attachments in emails

---
 cl/recap/tasks.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index 276773f96e..243c37229b 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -2279,6 +2279,7 @@ def get_and_copy_recap_attachment_docs(
     magic_number: str | None,
     pacer_case_id: str,
     user_pk: int,
+    de_seq_num: str | None,
 ) -> None:
     """Download and copy the corresponding PACER PDF to all the notification
     RECAPDocument attachments, including support for multi-docket NEFs.
@@ -2289,6 +2290,8 @@ def get_and_copy_recap_attachment_docs(
     :param magic_number: The magic number to fetch PACER documents for free.
     :param pacer_case_id: The pacer_case_id to query the free document.
     :param user_pk: The user to associate with the ProcessingQueue object.
+    :param de_seq_num: The sequential number assigned by the PACER system to
+     identify the docket entry within a case.
     :return: None
     """
 
@@ -2310,6 +2313,7 @@ def get_and_copy_recap_attachment_docs(
             user_pk,
             appellate,
             rd_att.attachment_number,
+            de_seq_num=de_seq_num,
         )
         fq = PacerFetchQueue.objects.create(
             user_id=user_pk,
@@ -2624,6 +2628,7 @@ def process_recap_email(
                 magic_number,
                 pacer_case_id,
                 user_pk,
+                de_seq_num=pacer_seq_no,
             )
 
     # Send docket alerts and webhooks for each docket updated.

From 435317f9a136ad3f13b1564d9257108d9e588cf6 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Thu, 1 Aug 2024 19:46:38 -0400
Subject: [PATCH 125/372] fix(recap): Add default value to the de_seq_num
 argument

---
 cl/recap/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index 243c37229b..3e0d0e6f95 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -2279,7 +2279,7 @@ def get_and_copy_recap_attachment_docs(
     magic_number: str | None,
     pacer_case_id: str,
     user_pk: int,
-    de_seq_num: str | None,
+    de_seq_num: str | None = None,
 ) -> None:
     """Download and copy the corresponding PACER PDF to all the notification
     RECAPDocument attachments, including support for multi-docket NEFs.

From 140f8851b715a5cc45bf5a24cb70935e95653a0e Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Thu, 1 Aug 2024 22:51:29 -0400
Subject: [PATCH 126/372] feat(recap): Use de_seq_num while fetching recap
 documents

---
 cl/corpus_importer/tasks.py | 25 +++++++++++++++++++++----
 cl/recap/tasks.py           |  2 ++
 cl/recap/tests.py           |  2 +-
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 3c3d31c919..5f192795b4 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -635,7 +635,11 @@ def get_and_process_free_pdf(
     )
     try:
         r, r_msg = download_pacer_pdf_by_rd(
-            rd.pk, result.pacer_case_id, result.pacer_doc_id, cookies_data
+            rd.pk,
+            result.pacer_case_id,
+            result.pacer_doc_id,
+            cookies_data,
+            de_seq_num=rd.docket_entry.pacer_sequence_number,
         )
     except HTTPError as exc:
         if exc.response and exc.response.status_code in [
@@ -1952,6 +1956,7 @@ def download_pacer_pdf_by_rd(
     pacer_doc_id: int,
     session_data: SessionData,
     magic_number: str | None = None,
+    de_seq_num: str | None = None,
 ) -> tuple[Response | None, str]:
     """Using a RECAPDocument object ID, download the PDF if it doesn't already
     exist.
@@ -1974,7 +1979,9 @@ def download_pacer_pdf_by_rd(
     )
     report = FreeOpinionReport(pacer_court_id, s)
 
-    r, r_msg = report.download_pdf(pacer_case_id, pacer_doc_id, magic_number)
+    r, r_msg = report.download_pdf(
+        pacer_case_id, pacer_doc_id, magic_number, de_seq_num=de_seq_num
+    )
 
     return r, r_msg
 
@@ -2261,8 +2268,13 @@ def get_pacer_doc_by_rd(
         return None
 
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
+    de_seq_num = rd.docket_entry.pacer_sequence_number
     r, r_msg = download_pacer_pdf_by_rd(
-        rd.pk, pacer_case_id, rd.pacer_doc_id, session_data
+        rd.pk,
+        pacer_case_id,
+        rd.pacer_doc_id,
+        session_data,
+        de_seq_num=de_seq_num,
     )
     court_id = rd.docket_entry.docket.court_id
 
@@ -2370,8 +2382,13 @@ def get_pacer_doc_by_rd_and_description(
         return
 
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
+    de_seq_num = rd.docket_entry.pacer_sequence_number
     r, r_msg = download_pacer_pdf_by_rd(
-        rd.pk, pacer_case_id, att_found["pacer_doc_id"], session_data
+        rd.pk,
+        pacer_case_id,
+        att_found["pacer_doc_id"],
+        session_data,
+        de_seq_num=de_seq_num,
     )
     court_id = rd.docket_entry.docket.court_id
 
diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index 3e0d0e6f95..d8b9d5a842 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -1659,6 +1659,7 @@ def fetch_pacer_doc_by_rd(
         cookies if isinstance(cookies, SessionData) else SessionData(cookies)
     )
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
+    de_seq_num = rd.docket_entry.pacer_sequence_number
     try:
         r, r_msg = download_pacer_pdf_by_rd(
             rd.pk,
@@ -1666,6 +1667,7 @@ def fetch_pacer_doc_by_rd(
             rd.pacer_doc_id,
             session_data,
             magic_number,
+            de_seq_num=de_seq_num,
         )
     except (requests.RequestException, HTTPError):
         msg = "Failed to get PDF from network."
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 8ac255fdd8..6de775c24f 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -7113,7 +7113,7 @@ def test_recap_attachment_page_webhook(
 
     @mock.patch(
         "cl.recap.tasks.download_pacer_pdf_by_rd",
-        side_effect=lambda z, x, c, v, b: (
+        side_effect=lambda z, x, c, v, b, de_seq_num: (
             MockResponse(
                 200,
                 mock_bucket_open(

From cb2a1d398d75bc353c153ce569b58f7c3496f2bc Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:31:11 -0400
Subject: [PATCH 127/372] feat(search.models): Add validation for ordering key

Dont allow negative or 0 as a key
Add validation in save
Make check explicit
---
 cl/search/models.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cl/search/models.py b/cl/search/models.py
index a6b54b9819..1bde2ebad0 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3347,6 +3347,11 @@ def get_absolute_url(self) -> str:
     def clean(self) -> None:
         if self.type == "":
             raise ValidationError("'type' is a required field.")
+        if self.ordering_key is not None and self.ordering_key != "":
+            if self.ordering_key < 1:
+                raise ValidationError(
+                    {"ordering_key": "Ordering key cannot be zero or negative"}
+                )
 
     def save(
         self,
@@ -3355,6 +3360,7 @@ def save(
         *args: List,
         **kwargs: Dict,
     ) -> None:
+        self.clean()
         super().save(*args, **kwargs)
         if index:
             from cl.search.tasks import add_items_to_solr

From 18ba5421355682b1e9813b5322e86cf2061fd4f9 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:32:36 -0400
Subject: [PATCH 128/372] refactor(update_opinion_order): Drop columbia

Drop columbia from opinion ordering
Refactor the argparse to be more CL-ish
using skip-until and limit

Update filtering commands
---
 .../commands/update_opinions_order.py         | 427 +++---------------
 cl/search/tests/tests.py                      |  40 +-
 2 files changed, 80 insertions(+), 387 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index dc00b24818..ab445a1491 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,415 +1,96 @@
-import os.path
-import re
-from typing import Optional
-
-from bs4 import BeautifulSoup
-from django.core.management import BaseCommand
-from django.db import transaction
 from django.db.models import Count, Q
 
-from cl.corpus_importer.import_columbia.columbia_utils import (
-    extract_columbia_opinions,
-    map_opinion_types,
-    process_extracted_opinions,
-    read_xml_to_soup,
-)
-from cl.corpus_importer.utils import EmptyOpinionException, match_opinion_lists
-from cl.lib.command_utils import logger
+from cl.lib.command_utils import VerboseCommand, logger
 from cl.search.models import SOURCES, Opinion, OpinionCluster
 
-VALID_COLUMBIA_SOURCES = [
-    key
-    for key in dict(SOURCES.NAMES).keys()
-    if SOURCES.COLUMBIA_ARCHIVE in key
-]
-
-VALID_HARVARD_SOURCES = [
-    key for key in dict(SOURCES.NAMES).keys() if SOURCES.HARVARD_CASELAW in key
-]
-
-
-def clean_opinion_content(text: str) -> str:
-    """Clean opinion content
-
-    :param text: text to clean
-    :return: cleaned text
-    """
-
-    # Replace line breaks with spaces and get rid of double spaces
-    text = re.sub(" +", " ", " ".join(text.split("\n"))).strip()
-
-    # Remove non-alphanumeric and non-whitespace characters from lowercase text
-    return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
-
-
-def get_opinions_cleaned_content(
-    cluster_id,
-) -> tuple[Optional[str], list[dict]]:
-    """Get cleaned opinions content for a cluster object
-
-    :param cluster_id: Cluster ID for a set of opinions
-    :return: (xml path, list of extracted opinions)
-    """
-    cl_cleaned_opinions = []
-    # by default the opinions are ordered by pk
-    opinions_from_cluster = (
-        Opinion.objects.filter(cluster_id=cluster_id)
-        .order_by("id")
-        .exclude(type="010combined")
-    )
-
-    xml_path = None
 
-    for i, op in enumerate(opinions_from_cluster):
-        if op.local_path and not xml_path:
-            xml_path = str(op.local_path)
+def sort_harvard_opinions(options) -> None:
+    """Sort harvard opinions
 
-        content = None
-
-        # We can only use columbia's content to infer the ordering
-        if len(op.html_columbia) > 1:
-            content = op.html_columbia
-
-        if not content:
-            raise EmptyOpinionException(
-                "There is no content in html_columbia field"
-            )
-
-        soup = BeautifulSoup(content, features="html.parser")
-        opinion_text = soup.getText(separator=" ", strip=True)
-        prep_text = clean_opinion_content(opinion_text)
-
-        cl_cleaned_opinions.append(
-            {
-                "id": op.id,
-                "byline": op.author_str,
-                "type": op.type,
-                "opinion": prep_text,
-                "order": i,
-            }
-        )
-
-    return (
-        xml_path,
-        cl_cleaned_opinions,
-    )
-
-
-def fix_filepath(filepath: str) -> str:
-    """Fix filepath from file field
-
-    :param filepath: path from file field
-    :return: new file path
-    """
-    if "/home/mlissner/columbia/opinions/" in filepath:
-        filepath = filepath.replace("/home/mlissner/columbia/opinions/", "")
-    return filepath
-
-
-def get_opinions_columbia_file(xml_filepath: str) -> list:
-    """Get opinions from columbia xml file and convert it into dict
-
-    :param xml_filepath: path of xml file
-    :return: dict with data
-    """
-    soup = read_xml_to_soup(xml_filepath)
-
-    # Find the outer <opinion> tag to have all elements inside
-    outer_opinion = soup.find("opinion")
-
-    extracted_opinions = extract_columbia_opinions(outer_opinion)
-    opinions = process_extracted_opinions(extracted_opinions)
-    map_opinion_types(opinions)
-
-    for op in opinions:
-        opinion_content = op.get("opinion")
-        soup = BeautifulSoup(opinion_content, "html.parser")
-        opinion_text = soup.getText(separator=" ", strip=True)
-        cleaned_opinion = clean_opinion_content(opinion_text)
-        op["opinion"] = cleaned_opinion
-
-    return opinions
-
-
-def sort_harvard_opinions(start_id: int, end_id: int) -> None:
-    """We assume that harvard data is already ordered, we just need to fill the order
-    field in each opinion
+    We assume that harvard data is already ordered, we just need to fill
+    the order field in each opinion
 
     The harvard importer created the opinions in order of appearance in the file
 
-    :param start_id: skip any id lower than this value
-    :param end_id: skip any id greater than this value
+    :param options: dict of arguments skip until and limit if given
     :return: None
     """
 
-    # The filepath_json_harvard field can only be filled by the harvard importer,
-    # this helps us confirm that it was imported from a Harvard json
-    base_filter = Q(
-        opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES
-    ) & ~Q(filepath_json_harvard="")
-
-    if start_id:
-        base_filter &= Q(pk__gte=start_id)
-
-    if end_id:
-        base_filter &= Q(pk__lte=end_id)
+    skip_until = options.get("skip_until", None)
+    limit = options.get("limit", None)
 
-    # Get all harvard clusters with more than one opinion
-    clusters = (
-        OpinionCluster.objects.prefetch_related("sub_opinions")
+    base_filter = (
+        OpinionCluster.objects.exclude(filepath_json_harvard="")
         .annotate(opinions_count=Count("sub_opinions"))
-        .filter(base_filter)
-        .order_by("id")
+        .filter(opinions_count__gt=1)
     )
 
-    for oc in clusters:
-        logger.info(f"Processing cluster id: {oc}")
-
-        cluster_opinions = oc.sub_opinions.exclude(
-            type="010combined"
-        ).order_by("id")
-
-        if not cluster_opinions:
-            logger.info(f"No opinions left to order for cluster id: {oc}")
-            continue
-
-        for opinion_order, cluster_op in enumerate(cluster_opinions, start=1):
-            cluster_op.ordering_key = opinion_order
-            cluster_op.save()
-
-        logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
-
-
-def update_opinions(
-    cluster_id: int,
-    cl_opinions: list,
-    columbia_opinions: list,
-    matches: dict,
-):
-    """Update opinions with correct order
+    if skip_until:
+        base_filter &= Q(pk__gte=skip_until)
 
-    :param cluster_id:
-    :param cl_opinions: a list with cleaned opinions from cl
-    :param columbia_opinions: an ordered list with cleaned opinions from xml file
-    :param matches: a dict with the matches of each opinion of both lists
-    :return: None
-    """
-    update_failed = False
-
-    with transaction.atomic():
-        for file_pos, cl_pos in matches.items():
-            # file_pos is the correct index to find the opinion id to update
-            file_opinion = columbia_opinions[file_pos]
-            # the order was calculated using the xml file
-            file_order = file_opinion.get("order")
-            cl_opinion = cl_opinions[cl_pos]
-            opinion_id_to_update = cl_opinion.get("id")
-
-            if opinion_id_to_update:
-                try:
-                    # Update opinion order
-                    op = Opinion.objects.get(id=opinion_id_to_update)
-                    op.ordering_key = file_order
-                    op.save()
-                except Opinion.DoesNotExist:
-                    # This should not happen, but it is better to be cautious
-                    logger.warning(
-                        f"We can't update opinion, opinion doesn't exist "
-                        f"with id: {opinion_id_to_update}"
-                    )
-                    update_failed = True
-                    break
-
-        if update_failed:
-            # There was an error updating an opinion, rollback all changes for
-            # cluster's opinions
-            logger.warning(
-                f"There was an error updating the order of opinions of the "
-                f"cluster id: {cluster_id}"
-            )
-            transaction.set_rollback(True)
-        else:
-            logger.info(
-                f"The order of opinions was updated, cluster id: {cluster_id}"
-            )
-
-
-def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
-    """Update opinion ordering for columbia clusters
-
-    :param start_id: skip any id lower than this value
-    :param end_id: skip any id greater than this value
-    :param xml_dir: absolute path to the directory with columbia xml files
-    :return: None
-    """
-
-    # Get all columbia cluster ids with more than one opinion
-    clusters = (
+    harvard_clusters = (
         OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source__in=VALID_COLUMBIA_SOURCES)
+        .filter(base_filter)
         .order_by("id")
-        .values_list("id", flat=True)
     )
+    if limit:
+        harvard_clusters = harvard_clusters[:limit]
 
-    if start_id:
-        clusters = filter(lambda x: x >= start_id, clusters)
-
-    if end_id:
-        clusters = filter(lambda x: x <= end_id, clusters)
-
-    for cluster_id in clusters:
-        logger.info(f"Processing cluster id: {cluster_id}")
-
-        try:
-            xml_path, cl_cleaned_opinions = get_opinions_cleaned_content(
-                cluster_id
-            )
-        except EmptyOpinionException:
-            logger.warning(
-                f"At least one of the opinions from cluster id: {cluster_id} is empty."
+    for cluster in harvard_clusters:
+        logger.info(f"Processing cluster id: {cluster}")
+        sub_opinions = cluster.sub_opinions.exclude(
+            type=Opinion.COMBINED,
+        ).order_by("id")
+        if not sub_opinions:
+            logger.info(
+                f"No sub_opinions left to order for cluster id: {cluster}"
             )
             continue
+        for opinion_order, cluster_op in enumerate(sub_opinions, start=1):
+            cluster_op.ordering_key = opinion_order
+            cluster_op.save()
+        logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}")
 
-        extracted_columbia_opinions = None
-        if xml_path:
-            fixed_xml_filepath = os.path.join(xml_dir, fix_filepath(xml_path))
-
-            if not os.path.exists(fixed_xml_filepath):
-                logger.warning(
-                    f"Xml file not found in {fixed_xml_filepath}, cluster id: {cluster_id}"
-                )
-                continue
-
-            try:
-                extracted_columbia_opinions = get_opinions_columbia_file(
-                    fixed_xml_filepath
-                )
-            except UnicodeDecodeError:
-                logger.warning(
-                    f"Cannot decode file: {fixed_xml_filepath}, cluster id: {cluster_id}"
-                )
-                continue
 
-        if cl_cleaned_opinions and extracted_columbia_opinions:
-            columbia_opinions_content = [
-                op.get("opinion")
-                for op in extracted_columbia_opinions
-                if op.get("opinion")
-            ]
-            cl_opinions_content = [
-                op.get("opinion")
-                for op in cl_cleaned_opinions
-                if op.get("opinion")
-            ]
+class Command(VerboseCommand):
+    help = "Add ordering Key for sub opinions"
 
-            if len(columbia_opinions_content) != len(cl_opinions_content):
-                logger.warning(
-                    f"The number of opinions in cl and the number of opinions in the xml is different, cluster id: {cluster_id}"
-                )
-                continue
+    def __init__(self, *args, **kwargs):
+        super(Command, self).__init__(*args, **kwargs)
 
-            # Try to match content between cl and xml
-            matches = match_opinion_lists(
-                columbia_opinions_content,
-                cl_opinions_content,
+    def valid_actions(self, s):
+        if s.lower() not in self.VALID_ACTIONS:
+            raise argparse.ArgumentTypeError(
+                "Unable to parse action. Valid actions are: %s"
+                % (", ".join(self.VALID_ACTIONS.keys()))
             )
 
-            if matches:
-                if len(matches.values()) != len(set(matches.values())):
-                    # We don't have a unique match for each opinion, they were
-                    # probably combined incorrectly
-                    logger.info(
-                        f"We can't infer opinions order for cluster id: {cluster_id}"
-                    )
-                    # Go to next cluster id
-                    continue
-
-                if len(cl_cleaned_opinions) > len(set(matches.values())):
-                    # We have more opinions than matches
-                    logger.info(
-                        f"We couldn't match all cl opinions to the file's "
-                        f"content, cluster id: {cluster_id}"
-                    )
-                    # Go to next cluster id
-                    continue
-
-                # All opinions matched, update all opinions order
-                update_opinions(
-                    cluster_id,
-                    cl_cleaned_opinions,
-                    extracted_columbia_opinions,
-                    matches,
-                )
-
-
-class Command(BaseCommand):
-    help = "Fill order field in Opinion objects"
-
-    def __init__(self, *args, **kwargs):
-        super(Command, self).__init__(*args, **kwargs)
+        return self.VALID_ACTIONS[s]
 
     def add_arguments(self, parser):
         parser.add_argument(
-            "--process-harvard",
-            action="store_true",
-            help="Fix harvard opinions order",
-        )
-        parser.add_argument(
-            "--process-columbia",
-            action="store_true",
-            help="Fix columbia opinions order",
-        )
-        parser.add_argument(
-            "--xml-dir",
-            default="/opt/courtlistener/_columbia",
+            "--skip-until",
+            help="Specific cluster id to skip until",
+            type=int,
             required=False,
-            help="The absolute path to the directory with columbia xml files",
         )
         parser.add_argument(
-            "--start-id",
+            "--limit",
             type=int,
-            default=0,
-            help="Start id for a range of clusters (inclusive)",
+            help="Number of files to sort",
+            required=False,
         )
+
         parser.add_argument(
-            "--end-id",
-            type=int,
-            default=0,
-            help="End id for a range of clusters (inclusive)",
+            "--action",
+            type=self.valid_actions,
+            required=True,
+            help="The action you wish to take. Valid choices are: %s"
+            % (", ".join(self.VALID_ACTIONS.keys())),
         )
 
-    def validate_args(self, opts):
-        """Validate arguments passed to the command
-
-        :param opts: dictionary with arguments from the command
-        :return: true if validations are satisfied else false
-        """
-        if opts["end_id"] > opts["start_id"]:
-            logger.error("end-id should be greater or equal than start-id")
-            return False
-
-        if not opts["process_harvard"] and not opts["process_columbia"]:
-            logger.error(
-                "One option required: process-harvard or process-columbia"
-            )
-            return False
-
-        if opts["process_harvard"] and opts["process_columbia"]:
-            logger.error(
-                "You can only select one option process-harvard or process-columbia"
-            )
-            return False
-        return True
-
     def handle(self, *args, **options):
+        super().handle(*args, **options)
+        options["action"](options)
 
-        if not self.validate_args(options):
-            return
-
-        if options["process_harvard"]:
-            sort_harvard_opinions(options["start_id"], options["end_id"])
-
-        if options["process_columbia"]:
-            sort_columbia_opinions(
-                options["start_id"], options["end_id"], options["xml_dir"]
-            )
+    VALID_ACTIONS = {"sort-harvard": sort_harvard_opinions}
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index ff7ed177b7..b8f85f719d 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -321,19 +321,17 @@ def test_opinions_order(self) -> None:
         # Create three opinions
         op_1 = OpinionFactory(
             cluster=cluster,
-            type="Concurrence Opinion",
+            type=Opinion.LEAD,
             ordering_key=1,
         )
-
         op_2 = OpinionFactory(
             cluster=cluster,
-            type="Dissent",
+            type=Opinion.CONCURRENCE,
             ordering_key=2,
         )
-
         op_3 = OpinionFactory(
             cluster=cluster,
-            type="Lead Opinion",
+            type=Opinion.DISSENT,
             ordering_key=3,
         )
 
@@ -343,24 +341,38 @@ def test_opinions_order(self) -> None:
         self.assertEqual(op_2.ordering_key, 2)
         self.assertEqual(op_3.ordering_key, 3)
 
+        # Can we swap orders?
+        op_1.ordering_key = None
+        op_1.save()
+
+        op_2.ordering_key = 1
+        op_2.save()
+
+        op_1.ordering_key = 2
+        op_1.save()
+
         # Can we update an opinion using an existing position?
         with transaction.atomic():
             with self.assertRaises(IntegrityError):
                 op_3.ordering_key = 2
                 op_3.save()
 
-        # Can we create an opinion using an existing position?
+        # Validate unique cluster/order
         with transaction.atomic():
             with self.assertRaises(IntegrityError):
-                op_4 = OpinionFactory(
-                    cluster=cluster, type="Lead Opinion", ordering_key=1
+                op = OpinionFactory(
+                    cluster=cluster,
+                    type=Opinion.ADDENDUM,
                 )
+                op.ordering_key = 3
+                op.save()
 
-        # Can we use negative positions?
-        op_4 = OpinionFactory(
-            cluster=cluster, type="Lead Opinion", ordering_key=-1
-        )
-        self.assertEqual(op_4.ordering_key, -1)
+        # Can we use avoid negative positions?
+        with transaction.atomic():
+            with self.assertRaises(ValidationError):
+                op = OpinionFactory(cluster=cluster, type=Opinion.LEAD)
+                op.ordering_key = -1
+                op.save()
 
         # Can we order the opinions from a cluster using the field?
         qs = (
@@ -368,7 +380,7 @@ def test_opinions_order(self) -> None:
             .order_by("ordering_key")
             .values_list("ordering_key", flat=True)
         )
-        self.assertEqual(list(qs), [-1, 1, 2, 3])
+        self.assertEqual(list(qs), [1, 2, 3, None])
 
         # Order default value is null
         op_5 = OpinionFactory(cluster=cluster, type="Lead Opinion")

From aae5840b7e43344fa1d6ce4f357f619805b537be Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:45:59 -0400
Subject: [PATCH 129/372] tests(fixtures): Set fixture values to null

None of the fixtures are ordered

Also i removed the 550 - not sure where it is used or why
it exists but i dont see a reason so i removed it.
---
 cl/search/fixtures/functest_opinions.json     |  8 +-
 cl/search/fixtures/opinions-issue-412.json    |  4 +-
 cl/search/fixtures/opinions-issue-550.json    | 96 -------------------
 .../fixtures/test_objects_query_counts.json   | 12 +--
 cl/search/fixtures/test_objects_search.json   | 12 +--
 .../fixtures/api_scotus_map_data.json         |  4 +-
 .../fixtures/scotus_map_data.json             | 34 +++----
 7 files changed, 37 insertions(+), 133 deletions(-)
 delete mode 100644 cl/search/fixtures/opinions-issue-550.json

diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json
index 2cc992a633..f1e6f2da44 100644
--- a/cl/search/fixtures/functest_opinions.json
+++ b/cl/search/fixtures/functest_opinions.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 10
@@ -136,7 +136,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 11
@@ -187,7 +187,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 2
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 12
@@ -258,7 +258,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 12
diff --git a/cl/search/fixtures/opinions-issue-412.json b/cl/search/fixtures/opinions-issue-412.json
index 0e7fbdc7e6..fa7d716ccb 100644
--- a/cl/search/fixtures/opinions-issue-412.json
+++ b/cl/search/fixtures/opinions-issue-412.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 10
@@ -136,7 +136,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json
deleted file mode 100644
index 3e359b044d..0000000000
--- a/cl/search/fixtures/opinions-issue-550.json
+++ /dev/null
@@ -1,96 +0,0 @@
-[
-  {
-    "fields": {
-      "date_blocked": null,
-      "date_reargument_denied": "2015-08-15",
-      "court": "ca1",
-      "date_reargued": "2015-08-15",
-      "case_name_full": "Voutila v. Bonvini",
-      "date_argued": "2015-08-15",
-      "date_modified": "2015-08-15T13:55:03.669Z",
-      "case_name": "case name docket 10",
-      "date_created": "2015-08-15T13:55:03.669Z",
-      "case_name_short": "short name for Voutila v. Bonvini",
-      "docket_number": "1337-np",
-      "slug": "case-name",
-      "source": 0,
-      "blocked": false
-    },
-    "model": "search.docket",
-    "pk": 10
-  },
-  {
-    "fields": {
-      "date_blocked": null,
-      "case_name_full": "Reference to Voutila v. Bonvini",
-      "case_name_short": "Case name in short for Voutila v. Bonvini",
-      "blocked": false,
-      "syllabus": "some rando syllabus",
-      "date_filed": "2015-12-20",
-      "procedural_history": "some rando history",
-      "source": "C",
-      "panel": [],
-      "judges": "",
-      "case_name": "Voutila v. Bonvini",
-      "attorneys": "a bunch of crooks!",
-      "slug": "case-name-cluster",
-      "posture": "",
-      "date_modified": "2015-08-15T14:10:56.801Z",
-      "precedential_status": "Published",
-      "citation_count": 1,
-      "scdb_id": "",
-      "nature_of_suit": "",
-      "non_participating_judges": [],
-      "date_created": "2015-08-15T14:10:56.801Z",
-      "docket": 10
-    },
-    "model": "search.opinioncluster",
-    "pk": 10
-  },
-  {
-    "fields": {
-      "sha1": "asdfasdfasdfasdfasdfasddf",
-      "date_modified": "2015-12-20T14:20:00.801Z",
-      "extracted_by_ocr": false,
-      "author": null,
-      "plain_text": "This is a combined opinion.",
-      "html": "",
-      "download_url": null,
-      "cluster": 10,
-      "html_with_citations": "",
-      "local_path": "doc/2005/05/04/state_of_indiana_v._charles_barker.doc",
-      "html_columbia": "",
-      "joined_by": [],
-      "date_created": "2015-08-15T14:10:56.801Z",
-      "html_lawbox": "",
-      "per_curiam": false,
-      "type": "010combined",
-      "ordering_key": 1
-    },
-    "model": "search.opinion",
-    "pk": 10
-  },
-  {
-    "fields": {
-      "sha1": "asdfasdfasdfasdfasdfasddf",
-      "date_modified": "2015-12-20T14:20:00.801Z",
-      "extracted_by_ocr": false,
-      "author": null,
-      "plain_text": "This is a lead opinion too.",
-      "html": "",
-      "download_url": null,
-      "cluster": 10,
-      "html_with_citations": "",
-      "local_path": "txt/2015/12/28/opinion_text.txt",
-      "html_columbia": "",
-      "joined_by": [],
-      "date_created": "2015-08-15T14:10:56.801Z",
-      "html_lawbox": "",
-      "per_curiam": false,
-      "type": "020lead",
-      "ordering_key": 2
-    },
-    "model": "search.opinion",
-    "pk": 11
-  }
-]
diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json
index 6a3f97da23..ca69a08ccc 100644
--- a/cl/search/fixtures/test_objects_query_counts.json
+++ b/cl/search/fixtures/test_objects_query_counts.json
@@ -301,7 +301,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"020lead",
-         "ordering_key": 1
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":1
@@ -326,7 +326,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 1
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":2
@@ -351,7 +351,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 1
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":3
@@ -375,7 +375,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 2
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":4
@@ -400,7 +400,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 3
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":5
@@ -424,7 +424,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 4
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":6
diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index 542d297d54..66c9915581 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -240,7 +240,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 1
@@ -263,7 +263,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 2
@@ -286,7 +286,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 3
@@ -309,7 +309,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 2
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 4
@@ -332,7 +332,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 3
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 5
@@ -355,7 +355,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 4
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 6
diff --git a/cl/visualizations/fixtures/api_scotus_map_data.json b/cl/visualizations/fixtures/api_scotus_map_data.json
index 3a13c3e4e7..3bce46e664 100644
--- a/cl/visualizations/fixtures/api_scotus_map_data.json
+++ b/cl/visualizations/fixtures/api_scotus_map_data.json
@@ -122,7 +122,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 1
@@ -145,7 +145,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 2
diff --git a/cl/visualizations/fixtures/scotus_map_data.json b/cl/visualizations/fixtures/scotus_map_data.json
index e0760f42bf..bf97605525 100644
--- a/cl/visualizations/fixtures/scotus_map_data.json
+++ b/cl/visualizations/fixtures/scotus_map_data.json
@@ -903,7 +903,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111014
@@ -926,7 +926,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111113
@@ -949,7 +949,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111464
@@ -972,7 +972,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111505
@@ -995,7 +995,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111924
@@ -1018,7 +1018,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 112331
@@ -1041,7 +1041,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 112646
@@ -1064,7 +1064,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 112779
@@ -1087,7 +1087,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 112874
@@ -1110,7 +1110,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 117967
@@ -1133,7 +1133,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 118377
@@ -1156,7 +1156,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 121168
@@ -1179,7 +1179,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 136984
@@ -1202,7 +1202,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 142900
@@ -1225,7 +1225,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 799990
@@ -1248,7 +1248,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 799993
@@ -1271,7 +1271,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 2674862

From 611a174c61f741028efd511cbdb0f29c9c24d035 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:49:06 -0400
Subject: [PATCH 130/372] refactor(columbia_utils): remove ordering from utils
 columbia

Unwind the rest of the columbia order
---
 cl/corpus_importer/import_columbia/columbia_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cl/corpus_importer/import_columbia/columbia_utils.py b/cl/corpus_importer/import_columbia/columbia_utils.py
index dec91fc1da..57bac9a66d 100644
--- a/cl/corpus_importer/import_columbia/columbia_utils.py
+++ b/cl/corpus_importer/import_columbia/columbia_utils.py
@@ -224,7 +224,6 @@ def extract_columbia_opinions(
     """
     opinions: list = []
     floating_content = []
-    order = 1  # The opinion count starts from 1
 
     # We iterate all content to look for all possible opinions
     for i, content in enumerate(outer_opinion):  # type: int, Tag
@@ -363,7 +362,6 @@ def process_extracted_opinions(extracted_opinions: list) -> list:
 
     opinions: list = []
     authorless_content = []
-    order = 1  # The opinion count starts from 1
 
     for i, found_content in enumerate(extracted_opinions, start=1):
         byline = found_content.get("byline")

From 7d86408acd6338485fe2bec16ec5200155842bd1 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:50:47 -0400
Subject: [PATCH 131/372] refactor(columbia_utils): Reset order utils - line

---
 cl/corpus_importer/import_columbia/columbia_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cl/corpus_importer/import_columbia/columbia_utils.py b/cl/corpus_importer/import_columbia/columbia_utils.py
index 57bac9a66d..b1a62cfd6c 100644
--- a/cl/corpus_importer/import_columbia/columbia_utils.py
+++ b/cl/corpus_importer/import_columbia/columbia_utils.py
@@ -224,6 +224,7 @@ def extract_columbia_opinions(
     """
     opinions: list = []
     floating_content = []
+    order = 0
 
     # We iterate all content to look for all possible opinions
     for i, content in enumerate(outer_opinion):  # type: int, Tag
@@ -362,6 +363,7 @@ def process_extracted_opinions(extracted_opinions: list) -> list:
 
     opinions: list = []
     authorless_content = []
+    order = 0
 
     for i, found_content in enumerate(extracted_opinions, start=1):
         byline = found_content.get("byline")

From 223f1da96adeb7582eefb79b6b9a676d451d64fd Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 16:19:30 -0400
Subject: [PATCH 132/372] docs(bulk): Update bulk data script and notes

---
 cl/api/templates/bulk-data.html | 3 +++
 scripts/make_bulk_data.sh       | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/cl/api/templates/bulk-data.html b/cl/api/templates/bulk-data.html
index 84b590687b..1e05e6172d 100644
--- a/cl/api/templates/bulk-data.html
+++ b/cl/api/templates/bulk-data.html
@@ -164,6 +164,9 @@ <h3 id="contributing">Adding Features and Fixing Bugs</h3>
 
     <h3 id="old">Release Notes</h3>
 	<p>
+      <strong>2024-08-2</strong>: Make Bulk Data script refactored to include new Docket fields. Including federal_dn_case_type, federal_dn_office_code, federal_dn_judge_initials_assigned, federal_dn_judge_initials_referred, federal_defendant_number, parent_docket_id.
+    </p>
+	<p>
       <strong>2023-09-26</strong>: Bulk script refactored to make it easier to maintain. Courthouse table added to bulk script. Court appeals_to through table added to bulk script. Bulk script now automatically generates a shell script to load bulk data and stream the script to S3.
     </p>
     <p>
diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 42469203e7..94339beaec 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -52,7 +52,9 @@ docket_fields='(id, date_created, date_modified, source, appeal_from_str,
 	       appellate_fee_status, appellate_case_type_information, mdl_status,
 	       filepath_local, filepath_ia, filepath_ia_json, ia_upload_failure_count, ia_needs_upload,
 	       ia_date_first_change, view_count, date_blocked, blocked, appeal_from_id, assigned_to_id,
-	       court_id, idb_data_id, originating_court_information_id, referred_to_id
+	       court_id, idb_data_id, originating_court_information_id, referred_to_id,
+	       federal_dn_case_type, federal_dn_office_code, federal_dn_judge_initials_assigned,
+	       federal_dn_judge_initials_referred, federal_defendant_number, parent_docket_id
 	       )'
 dockets_csv_filename="dockets-$(date -I).csv"
 

From fb01e5f09ee4343088ed065f1ca4494498e98fe4 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 2 Aug 2024 17:37:28 -0400
Subject: [PATCH 133/372] feat(users): Adds retry logic for Neon account
 creation on HTTP 500 errors

---
 cl/users/tasks.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/cl/users/tasks.py b/cl/users/tasks.py
index 3efe2fc59c..1ac1f8d6ca 100644
--- a/cl/users/tasks.py
+++ b/cl/users/tasks.py
@@ -1,4 +1,5 @@
 import logging
+from http import HTTPStatus
 from urllib.parse import urljoin
 
 from celery import Task
@@ -7,7 +8,7 @@
 from django.core.mail import send_mail
 from django.template import loader
 from django.utils.timezone import now
-from requests.exceptions import Timeout
+from requests.exceptions import HTTPError, Timeout
 
 from cl.api.models import Webhook, WebhookEvent
 from cl.celery_init import app
@@ -77,7 +78,15 @@ def create_neon_account(self: Task, user_id: int) -> None:
 
     if len(neon_accounts) == 0:
         # No account found, create one
-        new_account_id = neon_client.create_account(user)
+        try:
+            new_account_id = neon_client.create_account(user)
+        except HTTPError as exc:
+            if (
+                exc.response.status_code != HTTPStatus.INTERNAL_SERVER_ERROR
+                or self.request.retries == self.max_retries
+            ):
+                raise exc
+            raise self.retry(exc=exc)
         profile.neon_account_id = new_account_id
         profile.save(update_fields=["neon_account_id"])
 

From 06d10b33eb36d5dc2d5b690c6685add9beeb8244 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 2 Aug 2024 17:45:55 -0400
Subject: [PATCH 134/372] fix(donate): Updates logic to match user records

---
 cl/donate/api_views.py |  3 ++-
 cl/donate/tests.py     | 59 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/cl/donate/api_views.py b/cl/donate/api_views.py
index b13deb9a1b..6b2503de8d 100644
--- a/cl/donate/api_views.py
+++ b/cl/donate/api_views.py
@@ -4,6 +4,7 @@
 from django.contrib.auth.models import User
 from django.core.exceptions import ObjectDoesNotExist
 from django.db import transaction
+from django.db.models import F
 from django.http import HttpResponse
 from rest_framework import mixins, serializers, viewsets
 from rest_framework.request import Request
@@ -117,7 +118,7 @@ def _get_member_record(self, account_id: str) -> User:
             contact_data = neon_account["primaryContact"]
             users = User.objects.filter(
                 email__iexact=contact_data["email1"]
-            ).order_by("-last_login")
+            ).order_by(F("last_login").desc(nulls_last=True))
             if not users.exists():
                 address = self._get_address_from_neon_response(
                     contact_data["addresses"]
diff --git a/cl/donate/tests.py b/cl/donate/tests.py
index 0d5d950822..9fa22504d7 100644
--- a/cl/donate/tests.py
+++ b/cl/donate/tests.py
@@ -1,7 +1,9 @@
+from collections import defaultdict
 from datetime import timedelta
 from http import HTTPStatus
 from unittest.mock import patch
 
+from asgiref.sync import sync_to_async
 from django.core import mail
 from django.test import override_settings
 from django.test.client import AsyncClient, Client
@@ -17,6 +19,7 @@
 from cl.lib.test_helpers import UserProfileWithParentsFactory
 from cl.tests.cases import TestCase
 from cl.users.models import UserProfile
+from cl.users.utils import create_stub_account
 
 
 class EmailCommandTest(TestCase):
@@ -376,3 +379,59 @@ async def test_uses_insensitive_match_for_emails(
 
         # Check the neon_account_id was updated properly
         self.assertEqual(membership.user.profile.neon_account_id, "9524")
+
+    @patch(
+        "cl.lib.neon_utils.NeonClient.get_acount_by_id",
+    )
+    @patch.object(
+        MembershipWebhookViewSet, "_store_webhook_payload", return_value=None
+    )
+    async def test_updates_account_with_recent_login(
+        self, mock_store_webhook, mock_get_account
+    ) -> None:
+        # Create two profile records - one stub, one regular user,
+        _, stub_profile = await sync_to_async(create_stub_account)(
+            {
+                "email": "test_4@email.com",
+                "first_name": "test",
+                "last_name": "test",
+            },
+            defaultdict(lambda: ""),
+        )
+
+        user_profile = await sync_to_async(UserProfileWithParentsFactory)(
+            user__email="test_4@email.com"
+        )
+        user = user_profile.user
+        # Updates last login field for the regular user
+        user.last_login = now()
+        await user.asave()
+
+        # mocks the Neon API response
+        mock_get_account.return_value = {
+            "accountId": "1246",
+            "primaryContact": {
+                "email1": "test_4@email.com",
+                "firstName": "test",
+                "lastName": "test",
+            },
+        }
+
+        self.data["eventTrigger"] = "createMembership"
+        self.data["data"]["membership"]["accountId"] = "1246"
+        r = await self.async_client.post(
+            reverse("membership-webhooks-list", kwargs={"version": "v3"}),
+            data=self.data,
+            content_type="application/json",
+        )
+        self.assertEqual(r.status_code, HTTPStatus.CREATED)
+
+        # Refresh both profiles to ensure updated data
+        await stub_profile.arefresh_from_db()
+        await user_profile.arefresh_from_db()
+
+        # Verify stub account remains untouched
+        self.assertEqual(stub_profile.neon_account_id, "")
+
+        # Verify regular user account is updated with Neon data
+        self.assertEqual(user_profile.neon_account_id, "1246")

From 965e95e9f76b67cde11a3e71d5f361641c586d74 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Mon, 5 Aug 2024 09:23:39 -0400
Subject: [PATCH 135/372] Update cl/api/templates/bulk-data.html

Co-authored-by: Mike Lissner <mike@free.law>
---
 cl/api/templates/bulk-data.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/api/templates/bulk-data.html b/cl/api/templates/bulk-data.html
index 1e05e6172d..9dd126a812 100644
--- a/cl/api/templates/bulk-data.html
+++ b/cl/api/templates/bulk-data.html
@@ -164,7 +164,7 @@ <h3 id="contributing">Adding Features and Fixing Bugs</h3>
 
     <h3 id="old">Release Notes</h3>
 	<p>
-      <strong>2024-08-2</strong>: Make Bulk Data script refactored to include new Docket fields. Including federal_dn_case_type, federal_dn_office_code, federal_dn_judge_initials_assigned, federal_dn_judge_initials_referred, federal_defendant_number, parent_docket_id.
+      <strong>2024-08-02</strong>: Add new fields to the bulk data files for the Docket object: federal_dn_case_type, federal_dn_office_code, federal_dn_judge_initials_assigned, federal_dn_judge_initials_referred, federal_defendant_number, parent_docket_id.
     </p>
 	<p>
       <strong>2023-09-26</strong>: Bulk script refactored to make it easier to maintain. Courthouse table added to bulk script. Court appeals_to through table added to bulk script. Bulk script now automatically generates a shell script to load bulk data and stream the script to S3.

From 8f0a57f34da32a0745852d879518a235cd263685 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Mon, 5 Aug 2024 09:31:10 -0400
Subject: [PATCH 136/372] fix(migrations): Add customers.sql

Add _customers.sql
Remove triggers from .sql and customers.sql
---
 .../0032_update_docket_numbering_fields.sql   | 113 ------------------
 ...date_docket_numbering_fields_customers.sql |  59 +++++++++
 2 files changed, 59 insertions(+), 113 deletions(-)
 create mode 100644 cl/search/migrations/0032_update_docket_numbering_fields_customers.sql

diff --git a/cl/search/migrations/0032_update_docket_numbering_fields.sql b/cl/search/migrations/0032_update_docket_numbering_fields.sql
index 21401a4297..77991068e5 100644
--- a/cl/search/migrations/0032_update_docket_numbering_fields.sql
+++ b/cl/search/migrations/0032_update_docket_numbering_fields.sql
@@ -1,13 +1,5 @@
 BEGIN;
 --
--- Remove trigger update_or_delete_snapshot_delete from model docket
---
-DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket";
---
--- Remove trigger update_or_delete_snapshot_update from model docket
---
-DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket";
---
 -- Add field federal_defendant_number to docket
 --
 ALTER TABLE "search_docket" ADD COLUMN "federal_defendant_number" smallint NULL;
@@ -62,111 +54,6 @@ ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_office_code" DROP DEFA
 --
 -- Add field parent_docket to docketevent
 --
-ALTER TABLE "search_docketevent" ADD COLUMN "parent_docket_id" integer NULL;
---
--- Create trigger update_or_delete_snapshot_update on model docket
---
-
-            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
-                trigger_name NAME
-            )
-            RETURNS BOOLEAN AS $$
-                DECLARE
-                    _pgtrigger_ignore TEXT[];
-                    _result BOOLEAN;
-                BEGIN
-                    BEGIN
-                        SELECT INTO _pgtrigger_ignore
-                            CURRENT_SETTING('pgtrigger.ignore');
-                        EXCEPTION WHEN OTHERS THEN
-                    END;
-                    IF _pgtrigger_ignore IS NOT NULL THEN
-                        SELECT trigger_name = ANY(_pgtrigger_ignore)
-                        INTO _result;
-                        RETURN _result;
-                    ELSE
-                        RETURN FALSE;
-                    END IF;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_7e039()
-            RETURNS TRIGGER AS $$
-
-                BEGIN
-                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
-                        IF (TG_OP = 'DELETE') THEN
-                            RETURN OLD;
-                        ELSE
-                            RETURN NEW;
-                        END IF;
-                    END IF;
-                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "docket_number", "docket_number_core", "federal_defendant_number", "federal_dn_case_type", "federal_dn_judge_initials_assigned", "federal_dn_judge_initials_referred", "federal_dn_office_code", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."docket_number", OLD."docket_number_core", OLD."federal_defendant_number", OLD."federal_dn_case_type", OLD."federal_dn_judge_initials_assigned", OLD."federal_dn_judge_initials_referred", OLD."federal_dn_office_code", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket";
-            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039
-                AFTER UPDATE ON "search_docket"
-
-
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."parent_docket_id" IS DISTINCT FROM (NEW."parent_docket_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."federal_dn_office_code" IS DISTINCT FROM (NEW."federal_dn_office_code") OR OLD."federal_dn_case_type" IS DISTINCT FROM (NEW."federal_dn_case_type") OR OLD."federal_dn_judge_initials_assigned" IS DISTINCT FROM (NEW."federal_dn_judge_initials_assigned") OR OLD."federal_dn_judge_initials_referred" IS DISTINCT FROM (NEW."federal_dn_judge_initials_referred") OR OLD."federal_defendant_number" IS DISTINCT FROM (NEW."federal_defendant_number") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))
-                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_7e039();
-
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket" IS 'f2c9e18d74e58ec15e0f9d06a80edb4ae17347e8';
-
---
--- Create trigger update_or_delete_snapshot_delete on model docket
---
-
-            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
-                trigger_name NAME
-            )
-            RETURNS BOOLEAN AS $$
-                DECLARE
-                    _pgtrigger_ignore TEXT[];
-                    _result BOOLEAN;
-                BEGIN
-                    BEGIN
-                        SELECT INTO _pgtrigger_ignore
-                            CURRENT_SETTING('pgtrigger.ignore');
-                        EXCEPTION WHEN OTHERS THEN
-                    END;
-                    IF _pgtrigger_ignore IS NOT NULL THEN
-                        SELECT trigger_name = ANY(_pgtrigger_ignore)
-                        INTO _result;
-                        RETURN _result;
-                    ELSE
-                        RETURN FALSE;
-                    END IF;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_7294f()
-            RETURNS TRIGGER AS $$
-
-                BEGIN
-                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
-                        IF (TG_OP = 'DELETE') THEN
-                            RETURN OLD;
-                        ELSE
-                            RETURN NEW;
-                        END IF;
-                    END IF;
-                    INSERT INTO "search_docketevent" ("appeal_from_id", "appeal_from_str", "appellate_case_type_information", "appellate_fee_status", "assigned_to_id", "assigned_to_str", "blocked", "case_name", "case_name_full", "case_name_short", "cause", "court_id", "date_argued", "date_blocked", "date_cert_denied", "date_cert_granted", "date_created", "date_filed", "date_last_filing", "date_last_index", "date_modified", "date_reargued", "date_reargument_denied", "date_terminated", "docket_number", "docket_number_core", "federal_defendant_number", "federal_dn_case_type", "federal_dn_judge_initials_assigned", "federal_dn_judge_initials_referred", "federal_dn_office_code", "filepath_ia", "filepath_ia_json", "filepath_local", "ia_date_first_change", "ia_needs_upload", "ia_upload_failure_count", "id", "idb_data_id", "jurisdiction_type", "jury_demand", "mdl_status", "nature_of_suit", "originating_court_information_id", "pacer_case_id", "panel_str", "parent_docket_id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "referred_to_id", "referred_to_str", "slug", "source") VALUES (OLD."appeal_from_id", OLD."appeal_from_str", OLD."appellate_case_type_information", OLD."appellate_fee_status", OLD."assigned_to_id", OLD."assigned_to_str", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."cause", OLD."court_id", OLD."date_argued", OLD."date_blocked", OLD."date_cert_denied", OLD."date_cert_granted", OLD."date_created", OLD."date_filed", OLD."date_last_filing", OLD."date_last_index", OLD."date_modified", OLD."date_reargued", OLD."date_reargument_denied", OLD."date_terminated", OLD."docket_number", OLD."docket_number_core", OLD."federal_defendant_number", OLD."federal_dn_case_type", OLD."federal_dn_judge_initials_assigned", OLD."federal_dn_judge_initials_referred", OLD."federal_dn_office_code", OLD."filepath_ia", OLD."filepath_ia_json", OLD."filepath_local", OLD."ia_date_first_change", OLD."ia_needs_upload", OLD."ia_upload_failure_count", OLD."id", OLD."idb_data_id", OLD."jurisdiction_type", OLD."jury_demand", OLD."mdl_status", OLD."nature_of_suit", OLD."originating_court_information_id", OLD."pacer_case_id", OLD."panel_str", OLD."parent_docket_id", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."referred_to_id", OLD."referred_to_str", OLD."slug", OLD."source"); RETURN NULL;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket";
-            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f
-                AFTER DELETE ON "search_docket"
-
-
-                FOR EACH ROW
-                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_7294f();
-
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_7294f ON "search_docket" IS 'a4b1625360e32dfb7392272ed99823a289ea336a';
-
 CREATE INDEX "search_docket_parent_docket_id_1a514426" ON "search_docket" ("parent_docket_id");
 CREATE INDEX "search_docketevent_parent_docket_id_c7c9c9ad" ON "search_docketevent" ("parent_docket_id");
 COMMIT;
\ No newline at end of file
diff --git a/cl/search/migrations/0032_update_docket_numbering_fields_customers.sql b/cl/search/migrations/0032_update_docket_numbering_fields_customers.sql
new file mode 100644
index 0000000000..77991068e5
--- /dev/null
+++ b/cl/search/migrations/0032_update_docket_numbering_fields_customers.sql
@@ -0,0 +1,59 @@
+BEGIN;
+--
+-- Add field federal_defendant_number to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "federal_defendant_number" smallint NULL;
+--
+-- Add field federal_dn_case_type to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "federal_dn_case_type" varchar(6) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_case_type" DROP DEFAULT;
+--
+-- Add field federal_dn_judge_initials_assigned to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "federal_dn_judge_initials_assigned" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_judge_initials_assigned" DROP DEFAULT;
+--
+-- Add field federal_dn_judge_initials_referred to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "federal_dn_judge_initials_referred" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_judge_initials_referred" DROP DEFAULT;
+--
+-- Add field federal_dn_office_code to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "federal_dn_office_code" varchar(3) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_office_code" DROP DEFAULT;
+--
+-- Add field parent_docket to docket
+--
+ALTER TABLE "search_docket" ADD COLUMN "parent_docket_id" integer NULL CONSTRAINT "search_docket_parent_docket_id_1a514426_fk_search_docket_id" REFERENCES "search_docket"("id") DEFERRABLE INITIALLY DEFERRED; SET CONSTRAINTS "search_docket_parent_docket_id_1a514426_fk_search_docket_id" IMMEDIATE;
+--
+-- Add field federal_defendant_number to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_defendant_number" smallint NULL;
+--
+-- Add field federal_dn_case_type to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_case_type" varchar(6) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_case_type" DROP DEFAULT;
+--
+-- Add field federal_dn_judge_initials_assigned to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_judge_initials_assigned" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_judge_initials_assigned" DROP DEFAULT;
+--
+-- Add field federal_dn_judge_initials_referred to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_judge_initials_referred" varchar(5) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_judge_initials_referred" DROP DEFAULT;
+--
+-- Add field federal_dn_office_code to docketevent
+--
+ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_office_code" varchar(3) DEFAULT '' NOT NULL;
+ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_office_code" DROP DEFAULT;
+--
+-- Add field parent_docket to docketevent
+--
+CREATE INDEX "search_docket_parent_docket_id_1a514426" ON "search_docket" ("parent_docket_id");
+CREATE INDEX "search_docketevent_parent_docket_id_c7c9c9ad" ON "search_docketevent" ("parent_docket_id");
+COMMIT;
\ No newline at end of file

From 31767512bf5e3b2035a15930ba858d03fcde842d Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Mon, 5 Aug 2024 10:31:55 -0400
Subject: [PATCH 137/372] fix(0032_search_migrations): Update 0032 sql

Remove sql statements that would alter docketevent
table fields on _customer.sql
---
 ...date_docket_numbering_fields_customers.sql | 27 +------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/cl/search/migrations/0032_update_docket_numbering_fields_customers.sql b/cl/search/migrations/0032_update_docket_numbering_fields_customers.sql
index 77991068e5..c6409e0fdb 100644
--- a/cl/search/migrations/0032_update_docket_numbering_fields_customers.sql
+++ b/cl/search/migrations/0032_update_docket_numbering_fields_customers.sql
@@ -28,32 +28,7 @@ ALTER TABLE "search_docket" ALTER COLUMN "federal_dn_office_code" DROP DEFAULT;
 --
 ALTER TABLE "search_docket" ADD COLUMN "parent_docket_id" integer NULL CONSTRAINT "search_docket_parent_docket_id_1a514426_fk_search_docket_id" REFERENCES "search_docket"("id") DEFERRABLE INITIALLY DEFERRED; SET CONSTRAINTS "search_docket_parent_docket_id_1a514426_fk_search_docket_id" IMMEDIATE;
 --
--- Add field federal_defendant_number to docketevent
---
-ALTER TABLE "search_docketevent" ADD COLUMN "federal_defendant_number" smallint NULL;
---
--- Add field federal_dn_case_type to docketevent
---
-ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_case_type" varchar(6) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_case_type" DROP DEFAULT;
---
--- Add field federal_dn_judge_initials_assigned to docketevent
---
-ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_judge_initials_assigned" varchar(5) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_judge_initials_assigned" DROP DEFAULT;
---
--- Add field federal_dn_judge_initials_referred to docketevent
---
-ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_judge_initials_referred" varchar(5) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_judge_initials_referred" DROP DEFAULT;
---
--- Add field federal_dn_office_code to docketevent
---
-ALTER TABLE "search_docketevent" ADD COLUMN "federal_dn_office_code" varchar(3) DEFAULT '' NOT NULL;
-ALTER TABLE "search_docketevent" ALTER COLUMN "federal_dn_office_code" DROP DEFAULT;
---
--- Add field parent_docket to docketevent
+-- Create an index on the parent_docket_id column in the search_docket table
 --
 CREATE INDEX "search_docket_parent_docket_id_1a514426" ON "search_docket" ("parent_docket_id");
-CREATE INDEX "search_docketevent_parent_docket_id_c7c9c9ad" ON "search_docketevent" ("parent_docket_id");
 COMMIT;
\ No newline at end of file

From 57182e80f4feb83be70cbdb5bdd90c1968634128 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 5 Aug 2024 14:07:40 -0600
Subject: [PATCH 138/372] feat(opinion_order): update clean method in Opinion
 model update command to order harvard opinions

---
 .../commands/update_opinions_order.py         | 65 ++++++++++++-------
 cl/search/models.py                           |  9 ++-
 2 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index ab445a1491..592848c3e4 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,7 +1,11 @@
-from django.db.models import Count, Q
+import argparse
+import time
+
+from django.db import transaction
+from django.db.models import Count
 
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.search.models import SOURCES, Opinion, OpinionCluster
+from cl.search.models import Opinion, OpinionCluster
 
 
 def sort_harvard_opinions(options) -> None:
@@ -12,44 +16,51 @@ def sort_harvard_opinions(options) -> None:
 
     The harvard importer created the opinions in order of appearance in the file
 
-    :param options: dict of arguments skip until and limit if given
+    :param options: dict of arguments passed to the command
     :return: None
     """
 
     skip_until = options.get("skip_until", None)
     limit = options.get("limit", None)
 
-    base_filter = (
+    # The filepath_json_harvard field can only be filled by the harvard importer,
+    # this helps us confirm that it was imported from a Harvard json
+    harvard_clusters = (
         OpinionCluster.objects.exclude(filepath_json_harvard="")
+        .prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1)
+        .order_by("id")
     )
-
     if skip_until:
-        base_filter &= Q(pk__gte=skip_until)
+        harvard_clusters = harvard_clusters.filter(pk__gte=skip_until)
 
-    harvard_clusters = (
-        OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
-        .filter(base_filter)
-        .order_by("id")
-    )
     if limit:
         harvard_clusters = harvard_clusters[:limit]
 
     for cluster in harvard_clusters:
         logger.info(f"Processing cluster id: {cluster}")
-        sub_opinions = cluster.sub_opinions.exclude(
-            type=Opinion.COMBINED,
-        ).order_by("id")
-        if not sub_opinions:
-            logger.info(
-                f"No sub_opinions left to order for cluster id: {cluster}"
-            )
-            continue
-        for opinion_order, cluster_op in enumerate(sub_opinions, start=1):
-            cluster_op.ordering_key = opinion_order
-            cluster_op.save()
-        logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}")
+        opinion_order = 1
+        any_update = False
+        with transaction.atomic():
+            # We need to make sure they are ordered by id
+            for cluster_op in cluster.sub_opinions.all().order_by("id"):
+                if cluster_op.type == Opinion.COMBINED:
+                    continue
+                cluster_op.ordering_key = opinion_order
+                cluster_op.save()
+                opinion_order = opinion_order + 1
+                any_update = True
+            if not any_update:
+                # We want to know if you found anything unexpected, like for example
+                # only having combined opinions
+                logger.info(
+                    f"No sub_opinions updated for cluster id: {cluster}"
+                )
+                continue
+            logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}")
+            # Wait between each processed cluster to avoid issues with elastic
+            time.sleep(options["delay"])
 
 
 class Command(VerboseCommand):
@@ -80,7 +91,6 @@ def add_arguments(self, parser):
             help="Number of files to sort",
             required=False,
         )
-
         parser.add_argument(
             "--action",
             type=self.valid_actions,
@@ -88,6 +98,13 @@ def add_arguments(self, parser):
             help="The action you wish to take. Valid choices are: %s"
             % (", ".join(self.VALID_ACTIONS.keys())),
         )
+        parser.add_argument(
+            "--delay",
+            type=float,
+            default=0.2,
+            help="How long to wait to update each opinion (in seconds, allows "
+            "floating numbers).",
+        )
 
     def handle(self, *args, **options):
         super().handle(*args, **options)
diff --git a/cl/search/models.py b/cl/search/models.py
index 1bde2ebad0..e2cdedc905 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3347,11 +3347,10 @@ def get_absolute_url(self) -> str:
     def clean(self) -> None:
         if self.type == "":
             raise ValidationError("'type' is a required field.")
-        if self.ordering_key is not None and self.ordering_key != "":
-            if self.ordering_key < 1:
-                raise ValidationError(
-                    {"ordering_key": "Ordering key cannot be zero or negative"}
-                )
+        if isinstance(self.ordering_key, int) and self.ordering_key < 1:
+            raise ValidationError(
+                {"ordering_key": "Ordering key cannot be zero or negative"}
+            )
 
     def save(
         self,

From d1e2e004f24f326d96b0d0754965de72b28b8590 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 5 Aug 2024 14:20:41 -0600
Subject: [PATCH 139/372] feat(opinion_order): rename migrations

---
 .../{0032_order_opinions.py => 0033_order_opinions.py}        | 4 ++--
 .../{0032_order_opinions.sql => 0033_order_opinions.sql}      | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename cl/search/migrations/{0032_order_opinions.py => 0033_order_opinions.py} (97%)
 rename cl/search/migrations/{0032_order_opinions.sql => 0033_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0032_order_opinions.py b/cl/search/migrations/0033_order_opinions.py
similarity index 97%
rename from cl/search/migrations/0032_order_opinions.py
rename to cl/search/migrations/0033_order_opinions.py
index 9b4db9fbe7..ce5ea91c13 100644
--- a/cl/search/migrations/0032_order_opinions.py
+++ b/cl/search/migrations/0033_order_opinions.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-07-30 18:59
+# Generated by Django 5.0.7 on 2024-08-05 20:19
 
 import pgtrigger.compiler
 import pgtrigger.migrations
@@ -11,7 +11,7 @@ class Migration(migrations.Migration):
             "people_db",
             "0016_remove_abarating_update_or_delete_snapshot_update_and_more",
         ),
-        ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
+        ("search", "0032_update_docket_numbering_fields"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0032_order_opinions.sql b/cl/search/migrations/0033_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0032_order_opinions.sql
rename to cl/search/migrations/0033_order_opinions.sql

From 5958f54bd5a23b21a5bbd28682e50b98984492ad Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 5 Aug 2024 15:03:17 -0600
Subject: [PATCH 140/372] feat(opinion_order): add customers sql update replica
 sql

---
 cl/search/migrations/0033_order_opinions.sql  | 112 ------------------
 .../0033_order_opinions_customers.sql         |  10 ++
 2 files changed, 10 insertions(+), 112 deletions(-)
 create mode 100644 cl/search/migrations/0033_order_opinions_customers.sql

diff --git a/cl/search/migrations/0033_order_opinions.sql b/cl/search/migrations/0033_order_opinions.sql
index e02c150f4d..e2e07aee39 100644
--- a/cl/search/migrations/0033_order_opinions.sql
+++ b/cl/search/migrations/0033_order_opinions.sql
@@ -1,13 +1,5 @@
 BEGIN;
 --
--- Remove trigger update_or_delete_snapshot_delete from model opinion
---
-DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
---
--- Remove trigger update_or_delete_snapshot_update from model opinion
---
-DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
---
 -- Add field ordering_key to opinion
 --
 ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
@@ -15,110 +7,6 @@ ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
 -- Add field ordering_key to opinionevent
 --
 ALTER TABLE "search_opinionevent" ADD COLUMN "ordering_key" integer NULL;
---
--- Create trigger update_or_delete_snapshot_update on model opinion
---
-
-            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
-                trigger_name NAME
-            )
-            RETURNS BOOLEAN AS $$
-                DECLARE
-                    _pgtrigger_ignore TEXT[];
-                    _result BOOLEAN;
-                BEGIN
-                    BEGIN
-                        SELECT INTO _pgtrigger_ignore
-                            CURRENT_SETTING('pgtrigger.ignore');
-                        EXCEPTION WHEN OTHERS THEN
-                    END;
-                    IF _pgtrigger_ignore IS NOT NULL THEN
-                        SELECT trigger_name = ANY(_pgtrigger_ignore)
-                        INTO _result;
-                        RETURN _result;
-                    ELSE
-                        RETURN FALSE;
-                    END IF;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_67ecd()
-            RETURNS TRIGGER AS $$
-                
-                BEGIN
-                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
-                        IF (TG_OP = 'DELETE') THEN
-                            RETURN OLD;
-                        ELSE
-                            RETURN NEW;
-                        END IF;
-                    END IF;
-                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
-            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd
-                AFTER UPDATE ON "search_opinion"
-                
-                
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."ordering_key" IS DISTINCT FROM (NEW."ordering_key"))
-                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
-
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '7137855274503cc2c50a17729f82e150d2b7d872';
-        
---
--- Create trigger update_or_delete_snapshot_delete on model opinion
---
-
-            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
-                trigger_name NAME
-            )
-            RETURNS BOOLEAN AS $$
-                DECLARE
-                    _pgtrigger_ignore TEXT[];
-                    _result BOOLEAN;
-                BEGIN
-                    BEGIN
-                        SELECT INTO _pgtrigger_ignore
-                            CURRENT_SETTING('pgtrigger.ignore');
-                        EXCEPTION WHEN OTHERS THEN
-                    END;
-                    IF _pgtrigger_ignore IS NOT NULL THEN
-                        SELECT trigger_name = ANY(_pgtrigger_ignore)
-                        INTO _result;
-                        RETURN _result;
-                    ELSE
-                        RETURN FALSE;
-                    END IF;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_1f4fd()
-            RETURNS TRIGGER AS $$
-                
-                BEGIN
-                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
-                        IF (TG_OP = 'DELETE') THEN
-                            RETURN OLD;
-                        ELSE
-                            RETURN NEW;
-                        END IF;
-                    END IF;
-                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
-            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd
-                AFTER DELETE ON "search_opinion"
-                
-                
-                FOR EACH ROW 
-                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd();
-
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '98fb52aa60fd8e89a83f8f7ac77ba5892739fb37';
-        
 --
 -- Create constraint unique_opinion_ordering_key on model opinion
 --
diff --git a/cl/search/migrations/0033_order_opinions_customers.sql b/cl/search/migrations/0033_order_opinions_customers.sql
new file mode 100644
index 0000000000..e7158e3002
--- /dev/null
+++ b/cl/search/migrations/0033_order_opinions_customers.sql
@@ -0,0 +1,10 @@
+BEGIN;
+--
+-- Add field ordering_key to opinion
+--
+ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
+--
+-- Create constraint unique_opinion_ordering_key on model opinion
+--
+ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_ordering_key" UNIQUE ("cluster_id", "ordering_key");
+COMMIT;

From ed564f932b4a0170f374afccceeab869269e4bc9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 6 Aug 2024 12:46:16 -0600
Subject: [PATCH 141/372] feat(opinion_order): exclude columbia from clusters

---
 .../management/commands/update_opinions_order.py            | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 592848c3e4..1f1e5308e9 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -5,7 +5,7 @@
 from django.db.models import Count
 
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.search.models import Opinion, OpinionCluster
+from cl.search.models import SOURCES, Opinion, OpinionCluster
 
 
 def sort_harvard_opinions(options) -> None:
@@ -24,12 +24,14 @@ def sort_harvard_opinions(options) -> None:
     limit = options.get("limit", None)
 
     # The filepath_json_harvard field can only be filled by the harvard importer,
-    # this helps us confirm that it was imported from a Harvard json
+    # this helps us confirm that it was imported from a Harvard json. We exclude
+    # clusters merged with columbia because those may need some extra verification
     harvard_clusters = (
         OpinionCluster.objects.exclude(filepath_json_harvard="")
         .prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1)
+        .exclude(source__contains=SOURCES.COLUMBIA_ARCHIVE)
         .order_by("id")
     )
     if skip_until:

From 96f4c61930e606e2de8d5c34dcc4bbe8d8ff30bb Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Tue, 6 Aug 2024 19:49:38 -0500
Subject: [PATCH 142/372] refactor(scrapers.utils.get_binary_content): expect
 site.needs_special_headers

Solves #4281

- delete `headers` argument from get_binary_content signature and calls
- handle site.needs_special_headers to decide headers to use
---
 cl/scrapers/management/commands/cl_scrape_opinions.py     | 7 -------
 .../management/commands/cl_scrape_oral_arguments.py       | 1 -
 cl/scrapers/tests.py                                      | 8 ++++----
 cl/scrapers/utils.py                                      | 7 +++++--
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index 5f4469de23..59b4626a15 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -277,16 +277,9 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
         logger.debug(f"#{len(site)} opinions found.")
         added = 0
         for i, item in enumerate(site):
-            # Minn and Mass currently require browser specific user agents
-            if court_str in ["minn", "minnctapp", "mass", "massappct"]:
-                headers = site.headers
-            else:
-                headers = {"User-Agent": "CourtListener"}
-
             msg, r = get_binary_content(
                 item["download_urls"],
                 site,
-                headers,
                 method=site.method,
             )
             if msg:
diff --git a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
index 733274cd0d..ab19cac0f5 100644
--- a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
@@ -128,7 +128,6 @@ def scrape_court(
             msg, r = get_binary_content(
                 item["download_urls"],
                 site,
-                headers={"User-Agent": "CourtListener"},
                 method=site.method,
             )
             if msg:
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 2882b7c7c7..dba2479e45 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -633,7 +633,7 @@ def test_unexpected_content_type(self, mock_get):
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = ["text/html"]
 
-        msg, _ = get_binary_content("/dummy/url/", self.site, headers={})
+        msg, _ = get_binary_content("/dummy/url/", self.site)
         self.assertIn("UnexpectedContentTypeError:", msg)
 
     @mock.patch("requests.Session.get")
@@ -642,14 +642,14 @@ def test_correct_content_type(self, mock_get):
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = ["application/pdf"]
 
-        msg, _ = get_binary_content("/dummy/url/", self.site, headers={})
+        msg, _ = get_binary_content("/dummy/url/", self.site)
         self.assertEqual("", msg)
 
         self.mock_response.headers = {
             "Content-Type": "application/pdf;charset=utf-8"
         }
         mock_get.return_value = self.mock_response
-        msg, _ = get_binary_content("/dummy/url/", self.site, headers={})
+        msg, _ = get_binary_content("/dummy/url/", self.site)
         self.assertEqual("", msg)
 
     @mock.patch("requests.Session.get")
@@ -658,5 +658,5 @@ def test_no_content_type(self, mock_get):
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = None
 
-        msg, _ = get_binary_content("/dummy/url/", self.site, headers={})
+        msg, _ = get_binary_content("/dummy/url/", self.site)
         self.assertEqual("", msg)
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index 721ee428f3..b5e9fe51c1 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -155,7 +155,6 @@ def get_extension(content: bytes) -> str:
 def get_binary_content(
     download_url: str,
     site: AbstractSite,
-    headers: dict,
     method: str = "GET",
 ) -> Tuple[str, Optional[Response]]:
     """Downloads the file, covering a few special cases such as invalid SSL
@@ -163,7 +162,6 @@ def get_binary_content(
 
     :param download_url: The URL for the item you wish to download.
     :param site: Site object used to download data
-    :param headers: Headers that might be necessary to download the item.
     :param method: The HTTP method used to get the item, or "LOCAL" to get an
     item during testing
     :return: Two values. The first is a msg indicating any errors encountered.
@@ -188,6 +186,11 @@ def get_binary_content(
         has_cipher = hasattr(site, "cipher")
         s = site.request["session"] if has_cipher else requests.session()
 
+        if site.needs_special_headers:
+            headers = site.request["headers"]
+        else:
+            headers = {"User-Agent": "CourtListener"}
+
         # Note that we do a GET even if site.method is POST. This is
         # deliberate.
         r = s.get(

From 51406680677447a1a47811be6e8f9db5a1a0d8a5 Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Wed, 7 Aug 2024 15:51:02 -0300
Subject: [PATCH 143/372] escaping quote character for data export

---
 scripts/make_bulk_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 94339beaec..2e206c773d 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -337,7 +337,7 @@ echo "Streaming ${lst[0]} to S3"
 psql \
 	--command \
 	  "set statement_timeout to 0;
-	   COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE '`', FORCE_QUOTE *)" \
+	   COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE \"\`\", FORCE_QUOTE *)" \
 	--quiet \
 	--host "$DB_HOST" \
 	--username "$DB_USER" \

From b1e17682f915a7b50cb66187e3a545d039419b29 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Wed, 7 Aug 2024 14:13:34 -0500
Subject: [PATCH 144/372] feat(poetry.lock): update juriscraper to 2.6.14

---
 poetry.lock | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index aa078b45d2..844330f2df 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -2236,13 +2236,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.13"
+version = "2.6.14"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.13-py27-none-any.whl", hash = "sha256:437ad291fadf63201ceb8884ebd8eff2efebb893fb12c5387843807c3c69a06d"},
-    {file = "juriscraper-2.6.13.tar.gz", hash = "sha256:1468705ef02265876e9149a8fb7ac16fff512594bb06802cf587a236fa258755"},
+    {file = "juriscraper-2.6.14-py27-none-any.whl", hash = "sha256:97b9a3e607606398ce9c26dd867bc9ddbc6dfc2ba2e8618dc23e673256931aaa"},
+    {file = "juriscraper-2.6.14.tar.gz", hash = "sha256:fee119f1f56d91c554646e0ebd4057e554321efa39224cf7c16b02989c99ce3a"},
 ]
 
 [package.dependencies]

From dd860f79b92d7ae455e7ff38d7bd2c853ae0da12 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 7 Aug 2024 19:36:59 +0000
Subject: [PATCH 145/372] build(deps): bump django from 5.0.7 to 5.0.8

Bumps [django](https://github.com/django/django) from 5.0.7 to 5.0.8.
- [Commits](https://github.com/django/django/compare/5.0.7...5.0.8)

---
updated-dependencies:
- dependency-name: django
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 10 +++++-----
 pyproject.toml |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index aa078b45d2..2bf7a978b8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -897,13 +897,13 @@ files = [
 
 [[package]]
 name = "django"
-version = "5.0.7"
+version = "5.0.8"
 description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "Django-5.0.7-py3-none-any.whl", hash = "sha256:f216510ace3de5de01329463a315a629f33480e893a9024fc93d8c32c22913da"},
-    {file = "Django-5.0.7.tar.gz", hash = "sha256:bd4505cae0b9bd642313e8fb71810893df5dc2ffcacaa67a33af2d5cd61888f2"},
+    {file = "Django-5.0.8-py3-none-any.whl", hash = "sha256:333a7988f7ca4bc14d360d3d8f6b793704517761ae3813b95432043daec22a45"},
+    {file = "Django-5.0.8.tar.gz", hash = "sha256:ebe859c9da6fead9c9ee6dbfa4943b04f41342f4cea2c4d8c978ef0d10694f2b"},
 ]
 
 [package.dependencies]
@@ -5483,4 +5483,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "1d955b678d4933608661b478e8ed00f506e24e9f4026fcf070b5bb9096b9d599"
+content-hash = "76d77892b4272644423b572867efc23307baec23355ce6f82b7e5902776627cf"
diff --git a/pyproject.toml b/pyproject.toml
index 6a410b0f13..a1f77afccd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,7 @@ celery = "^5.3.6"
 certifi = "^2024.7.4"
 courts-db = "*"
 disposable-email-domains = "*"
-Django = "^5.0.7"
+Django = "^5.0.8"
 django-cache-memoize = "==0.*"
 django-cors-headers = "^4.4.0"
 django-csp = "^3.8"

From d6d9bb1b31383d8284fbb349ba5521c295f5637f Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 7 Aug 2024 15:25:20 -0500
Subject: [PATCH 146/372] feat(recap): Merge PACER docket_number components

---
 cl/recap/factories.py |  9 ++++++++
 cl/recap/mergers.py   | 20 ++++++++++++++++++
 cl/recap/tests.py     | 34 ++++++++++++++++++++++++++++++
 poetry.lock           | 48 +++++++++++++++++++++----------------------
 pyproject.toml        |  2 +-
 5 files changed, 88 insertions(+), 25 deletions(-)

diff --git a/cl/recap/factories.py b/cl/recap/factories.py
index a1b994d152..0b04778fe4 100644
--- a/cl/recap/factories.py
+++ b/cl/recap/factories.py
@@ -132,3 +132,12 @@ class DocketDataFactory(DictFactory):
     docket_number = Faker("federal_district_docket_number")
     date_filed = Faker("date_object")
     ordered_by = "date_filed"
+    federal_dn_office_code = Faker("pyint", min_value=1, max_value=10)
+    federal_dn_case_type = FuzzyText(length=2, chars=string.ascii_lowercase)
+    federal_dn_judge_initials_assigned = FuzzyText(
+        length=5, chars=string.ascii_lowercase
+    )
+    federal_dn_judge_initials_referred = FuzzyText(
+        length=5, chars=string.ascii_lowercase
+    )
+    federal_defendant_number = Faker("pyint", min_value=1, max_value=999)
diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index a119de4338..72421ec847 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -333,6 +333,26 @@ async def update_docket_metadata(
     d.referred_to_str = docket_data.get("referred_to_str") or d.referred_to_str
     d.blocked, d.date_blocked = await get_blocked_status(d)
 
+    # Update docket_number components:
+    d.federal_dn_office_code = (
+        docket_data.get("federal_dn_office_code") or d.federal_dn_office_code
+    )
+    d.federal_dn_case_type = (
+        docket_data.get("federal_dn_case_type") or d.federal_dn_case_type
+    )
+    d.federal_dn_judge_initials_assigned = (
+        docket_data.get("federal_dn_judge_initials_assigned")
+        or d.federal_dn_judge_initials_assigned
+    )
+    d.federal_dn_judge_initials_referred = (
+        docket_data.get("federal_dn_judge_initials_referred")
+        or d.federal_dn_judge_initials_referred
+    )
+    d.federal_defendant_number = (
+        docket_data.get("federal_defendant_number")
+        or d.federal_defendant_number
+    )
+
     return d
 
 
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 6de775c24f..ccf00ab965 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -2561,6 +2561,40 @@ def test_avoid_overwriting_nature_of_suit_in_update_docket_metadata(
                 )
         d.delete()
 
+    def test_merge_docket_number_components(
+        self,
+    ) -> None:
+        """Confirm docket_number components are properly merged into the
+        docket instance.
+        """
+
+        d = DocketFactory.create(
+            source=Docket.DEFAULT,
+            pacer_case_id="12345",
+            court_id=self.court.pk,
+        )
+
+        docket_data = DocketDataFactory(
+            court_id=d.court_id,
+            docket_number="3:20-cr-00070-TKW-MAL-1",
+            federal_dn_office_code="3",
+            federal_dn_case_type="cr",
+            federal_dn_judge_initials_assigned="TKW",
+            federal_dn_judge_initials_referred="MAL",
+            federal_defendant_number="1",
+        )
+        async_to_sync(update_docket_metadata)(d, docket_data)
+        d.save()
+        d.refresh_from_db()
+
+        self.assertEqual(d.federal_dn_office_code, "3")
+        self.assertEqual(d.federal_dn_case_type, "cr")
+        self.assertEqual(d.federal_dn_judge_initials_assigned, "TKW")
+        self.assertEqual(d.federal_dn_judge_initials_referred, "MAL")
+        self.assertEqual(d.federal_defendant_number, 1)
+
+        d.delete()
+
 
 @mock.patch("cl.recap.tasks.add_items_to_solr")
 class RecapDocketAttachmentTaskTest(TestCase):
diff --git a/poetry.lock b/poetry.lock
index aa078b45d2..50f6bbe8ae 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2236,13 +2236,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.13"
+version = "2.6.15"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.13-py27-none-any.whl", hash = "sha256:437ad291fadf63201ceb8884ebd8eff2efebb893fb12c5387843807c3c69a06d"},
-    {file = "juriscraper-2.6.13.tar.gz", hash = "sha256:1468705ef02265876e9149a8fb7ac16fff512594bb06802cf587a236fa258755"},
+    {file = "juriscraper-2.6.15-py27-none-any.whl", hash = "sha256:d65d3e13c64815792c008ddeb070b5416ccf464892364d0d1b696f4752f0b085"},
+    {file = "juriscraper-2.6.15.tar.gz", hash = "sha256:eef3dd6a3db986dfe9545f5657454467d728ec34d9ffb849fb9bd2a8bd411b91"},
 ]
 
 [package.dependencies]
@@ -2773,27 +2773,27 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
 
 [[package]]
 name = "nh3"
-version = "0.2.17"
+version = "0.2.18"
 description = "Python bindings to the ammonia HTML sanitization library."
 optional = false
 python-versions = "*"
 files = [
-    {file = "nh3-0.2.17-cp37-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:551672fd71d06cd828e282abdb810d1be24e1abb7ae2543a8fa36a71c1006fe9"},
-    {file = "nh3-0.2.17-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c551eb2a3876e8ff2ac63dff1585236ed5dfec5ffd82216a7a174f7c5082a78a"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:66f17d78826096291bd264f260213d2b3905e3c7fae6dfc5337d49429f1dc9f3"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0316c25b76289cf23be6b66c77d3608a4fdf537b35426280032f432f14291b9a"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:22c26e20acbb253a5bdd33d432a326d18508a910e4dcf9a3316179860d53345a"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:85cdbcca8ef10733bd31f931956f7fbb85145a4d11ab9e6742bbf44d88b7e351"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:40015514022af31975c0b3bca4014634fa13cb5dc4dbcbc00570acc781316dcc"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba73a2f8d3a1b966e9cdba7b211779ad8a2561d2dba9674b8a19ed817923f65f"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c21bac1a7245cbd88c0b0e4a420221b7bfa838a2814ee5bb924e9c2f10a1120b"},
-    {file = "nh3-0.2.17-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d7a25fd8c86657f5d9d576268e3b3767c5cd4f42867c9383618be8517f0f022a"},
-    {file = "nh3-0.2.17-cp37-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:c790769152308421283679a142dbdb3d1c46c79c823008ecea8e8141db1a2062"},
-    {file = "nh3-0.2.17-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:b4427ef0d2dfdec10b641ed0bdaf17957eb625b2ec0ea9329b3d28806c153d71"},
-    {file = "nh3-0.2.17-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a3f55fabe29164ba6026b5ad5c3151c314d136fd67415a17660b4aaddacf1b10"},
-    {file = "nh3-0.2.17-cp37-abi3-win32.whl", hash = "sha256:1a814dd7bba1cb0aba5bcb9bebcc88fd801b63e21e2450ae6c52d3b3336bc911"},
-    {file = "nh3-0.2.17-cp37-abi3-win_amd64.whl", hash = "sha256:1aa52a7def528297f256de0844e8dd680ee279e79583c76d6fa73a978186ddfb"},
-    {file = "nh3-0.2.17.tar.gz", hash = "sha256:40d0741a19c3d645e54efba71cb0d8c475b59135c1e3c580f879ad5514cbf028"},
+    {file = "nh3-0.2.18-cp37-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86"},
+    {file = "nh3-0.2.18-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f"},
+    {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe"},
+    {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a"},
+    {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50"},
+    {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204"},
+    {file = "nh3-0.2.18-cp37-abi3-win32.whl", hash = "sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be"},
+    {file = "nh3-0.2.18-cp37-abi3-win_amd64.whl", hash = "sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844"},
+    {file = "nh3-0.2.18.tar.gz", hash = "sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4"},
 ]
 
 [[package]]
@@ -4108,13 +4108,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "requests-file"
-version = "2.0.0"
+version = "2.1.0"
 description = "File transport adapter for Requests"
 optional = false
 python-versions = "*"
 files = [
-    {file = "requests-file-2.0.0.tar.gz", hash = "sha256:20c5931629c558fda566cacc10cfe2cd502433e628f568c34c80d96a0cc95972"},
-    {file = "requests_file-2.0.0-py2.py3-none-any.whl", hash = "sha256:3e493d390adb44aa102ebea827a48717336d5268968c370eaf19abaf5cae13bf"},
+    {file = "requests_file-2.1.0-py2.py3-none-any.whl", hash = "sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c"},
+    {file = "requests_file-2.1.0.tar.gz", hash = "sha256:0f549a3f3b0699415ac04d167e9cb39bccfb730cb832b4d20be3d9867356e658"},
 ]
 
 [package.dependencies]
@@ -5483,4 +5483,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "1d955b678d4933608661b478e8ed00f506e24e9f4026fcf070b5bb9096b9d599"
+content-hash = "a9eb7ddf97f49738284045feaca78bbad6d8b5a42a6580f29938e27581fb9228"
diff --git a/pyproject.toml b/pyproject.toml
index 6a410b0f13..272960fac6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,12 +110,12 @@ psycopg = {extras = ["binary"], version = "^3.1.17"}
 httpx = {extras = ["http2"], version = "^0.26.0"}
 django-model-utils = "^4.5.1"
 django-permissions-policy = "^4.19.0"
-juriscraper = "^2.6.0"
 tiktoken = "^0.6.0"
 hyperscan = "^0.7.7"
 openai = "^1.31.1"
 seal-rookery = "^2.2.3"
 types-pytz = "^2024.1.0.20240417"
+juriscraper = "^2.6.15"
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = "^3.7.0"

From 7190f2227fcd1a91ddfe37c2522c4f2fe1a4c4e4 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 7 Aug 2024 18:43:02 -0600
Subject: [PATCH 147/372] fix(scrape_pacer_free): correct order of
 ocr_available function params

---
 .../management/commands/scrape_pacer_free_opinions.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index ff96862bc4..f31ee2ad45 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -360,7 +360,7 @@ def do_everything(courts, date_start, date_end, index, queue):
     logger.info("Getting PDFs from free document reports")
     get_pdfs(courts, date_start, date_end, index, queue)
     logger.info("Doing OCR and saving items to Solr.")
-    ocr_available(index, queue)
+    ocr_available(queue, index)
 
 
 class Command(VerboseCommand):

From 459084b5aa68f7f2da0d9af89c646c5012c2fabd Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 7 Aug 2024 19:01:01 -0600
Subject: [PATCH 148/372] feat(scrape_pacer_free): log number of documents
 requiring OCR

---
 .../management/commands/scrape_pacer_free_opinions.py            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index f31ee2ad45..6aa4d76765 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -327,6 +327,7 @@ def ocr_available(queue: str, index: bool) -> None:
         .order_by()
     )
     count = rds.count()
+    logger.info(f"Total documents requiring OCR: {count}")
     throttle = CeleryThrottle(queue_name=q)
     for i, pk in enumerate(rds):
         throttle.maybe_wait()

From 81dd5ab2a78a9d0d7b43bb6019a3fca6c38cc67b Mon Sep 17 00:00:00 2001
From: v_anne <69829523+v-anne@users.noreply.github.com>
Date: Wed, 7 Aug 2024 22:36:09 -0400
Subject: [PATCH 149/372] Completing issue 4228

---
 cl/lib/model_helpers.py                    | 26 +++++++++
 cl/lib/tests.py                            | 66 ++++++++++++++++++++++
 cl/opinion_page/templates/docket_tabs.html |  2 +
 cl/search/models.py                        |  5 ++
 4 files changed, 99 insertions(+)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index de1c81a8dd..349e21a203 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -487,3 +487,29 @@ def suppress_autotime(model, fields):
                 field.auto_now_add = _original_values[field.name][
                     "auto_now_add"
                 ]
+
+def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
+    """Make an originating docket number for an appellate case into a link (MVP version)
+
+    :param agency: The administrative agency the case originated from
+    :param og_docket_number: The docket number where the case was originally heard.
+    :returns: A linkified version of the docket number for the user to click on, or the original if no link can be made.
+    """
+    # Simple pattern for Federal Register citations
+    fr_match = re.search(r'(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,5})', og_docket_number)
+    if fr_match:
+        volume, page = fr_match.groups()
+        return f"https://www.federalregister.gov/citation/{volume}-FR-{page}"
+    
+    # NLRB pattern
+    if agency == 'National Labor Relations Board':
+        match = re.match(r'^(?:NLRB-)?(\d{1,2})-?([A-Z]{2})-?(\d{1,6})$', og_docket_number)
+        if match:
+            region, case_type, number = match.groups()
+            formatted_number = f"{region.zfill(2)}-{case_type}-{number.zfill(6)}"
+            return f"https://www.nlrb.gov/case/{formatted_number}"
+    
+    """Add other agencies as feasible. Note that the Federal Register link should cover multiple agencies.
+    """
+    # If no match is found, return None
+    return None
\ No newline at end of file
diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index ec1c9bcf7e..5e712399e8 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -18,6 +18,7 @@
     is_docket_number,
     make_docket_number_core,
     make_upload_path,
+    linkify_orig_docket_number,
 )
 from cl.lib.pacer import (
     get_blocked_status,
@@ -1256,3 +1257,68 @@ def test_redis_lock(self) -> None:
 
         result = release_redis_lock(r, lock_key, identifier)
         self.assertEqual(result, 1)
+
+class TestLinkifyOrigDocketNumber(SimpleTestCase):
+    def test_linkify_orig_docket_number(self):
+        test_pairs = [
+            (
+                "National Labor Relations Board",
+                "19-CA-289275",
+                "https://www.nlrb.gov/case/19-CA-289275"
+            ),
+            (
+                "National Labor Relations Board",
+                "NLRB-09CA110508",
+                "https://www.nlrb.gov/case/09-CA-110508"
+            ),
+            (
+                "EPA",
+                "85 FR 20688",
+                "https://www.federalregister.gov/citation/85-FR-20688"
+            ),
+            (
+                "Other Agency",
+                "85 Fed. Reg. 12345",
+                "https://www.federalregister.gov/citation/85-FR-12345"
+            ),
+            (
+                "National Labor Relations Board",
+                "85 Fed. Reg. 12345",
+                "https://www.federalregister.gov/citation/85-FR-12345"
+            ),
+            (
+                "Bureau of Land Managemnet",
+                "88FR20688",
+                "https://www.federalregister.gov/citation/88-FR-20688"
+            ),
+            (
+                "Bureau of Land Managemnet",
+                "88 Fed Reg 34523",
+                "https://www.federalregister.gov/citation/88-FR-34523"
+            ),
+            (
+                "Federal Communications Commission",
+                "19-CA-289275",
+                None
+            ),
+            (
+                "National Labor Relations Board",
+                "This is not an NLRB case",
+                None
+            ),
+            (
+                "Other Agency",
+                "This is not a Federal Register citation",
+                None
+            ),
+        ]
+
+        for i, (agency, docket_number, expected_output) in enumerate(test_pairs):
+            with self.subTest( 
+            f"Testing description text cleaning for {agency, docket_number}...", i=i 
+        ): 
+                self.assertEqual( 
+                    linkify_orig_docket_number(agency, docket_number), 
+                    expected_output, 
+                    f"Got incorrect result from clean_parenthetical_text for text: {agency, docket_number}", 
+                )
\ No newline at end of file
diff --git a/cl/opinion_page/templates/docket_tabs.html b/cl/opinion_page/templates/docket_tabs.html
index 22d274cb68..5f0c6d8bb6 100644
--- a/cl/opinion_page/templates/docket_tabs.html
+++ b/cl/opinion_page/templates/docket_tabs.html
@@ -349,6 +349,8 @@ <h3 class="v-offset-above-3">Originating Court Information</h3>
                 data-toggle="tooltip"
                 data-placement="right"
                 title="Search for this docket number in the RECAP Archive.">{{ og_info.docket_number }})</a>
+              {% elif og_info.administrative_link %}
+                (<a href="{{ og_info.administrative_link }}">{{ og_info.docket_number }}</a>)
               {% else %}
                 ({{ og_info.docket_number }})
               {% endif %}
diff --git a/cl/search/models.py b/cl/search/models.py
index 3e68dc476b..e6c05d6d2c 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -31,6 +31,7 @@
     make_docket_number_core,
     make_recap_path,
     make_upload_path,
+    linkify_orig_docket_number,
 )
 from cl.lib.models import AbstractDateTimeModel, AbstractPDF, s3_warning_note
 from cl.lib.pghistory import AfterUpdateOrDeleteSnapshot
@@ -327,6 +328,10 @@ class OriginatingCourtInformation(AbstractDateTimeModel):
         null=True,
     )
 
+    @property
+    def administrative_link(self):
+        return linkify_orig_docket_number(self.docket.appeal_from_str, self.docket_number)
+
     def get_absolute_url(self) -> str:
         return self.docket.get_absolute_url()
 

From aec4301e3ebeaf5f098f1208dfc38327d2049948 Mon Sep 17 00:00:00 2001
From: jtmst <josh.troy.mills@gmail.com>
Date: Wed, 7 Aug 2024 14:38:02 -0400
Subject: [PATCH 150/372] Models: update opinionCluster model and helper

---
 cl/lib/model_helpers.py | 4 +++-
 cl/search/models.py     | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index de1c81a8dd..44dc8afa04 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -207,12 +207,14 @@ def make_upload_path(instance, filename):
         d = instance.file_with_date
     except AttributeError:
         from cl.audio.models import Audio
-        from cl.search.models import Opinion
+        from cl.search.models import Opinion, OpinionCluster
 
         if type(instance) == Audio:
             d = instance.docket.date_argued
         elif type(instance) == Opinion:
             d = instance.cluster.date_filed
+        elif type(instance) == OpinionCluster:
+            d = instance.date_filed
 
     return "%s/%s/%02d/%02d/%s" % (
         filename.split(".")[-1],
diff --git a/cl/search/models.py b/cl/search/models.py
index 478f3f4f80..dc1772833b 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2624,6 +2624,13 @@ class OpinionCluster(AbstractDateTimeModel):
         blank=True,
         db_index=True,
     )
+    filepath_pdf_harvard = models.FileField(
+        help_text="The case PDF from the Caselaw Access Project for this cluster",
+        upload_to=make_upload_path,
+        storage=IncrementingAWSMediaStorage(),
+        blank=True,
+        null=True,
+    )
     arguments = models.TextField(
         help_text="The attorney(s) and legal arguments presented as HTML text. "
         "This is primarily seen in older opinions and can contain "

From 804022c939d91dacc8ec953278685a191331af4b Mon Sep 17 00:00:00 2001
From: jtmst <josh.troy.mills@gmail.com>
Date: Wed, 7 Aug 2024 14:38:50 -0400
Subject: [PATCH 151/372] models: migration and sql files

---
 .../0033_add_harvard_pdf_to_opinioncluster.py | 75 +++++++++++++++++++
 ...0033_add_harvard_pdf_to_opinioncluster.sql |  4 +
 ...arvard_pdf_to_opinioncluster_customers.sql |  3 +
 3 files changed, 82 insertions(+)
 create mode 100644 cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.py
 create mode 100644 cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.sql
 create mode 100644 cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster_customers.sql

diff --git a/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.py b/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.py
new file mode 100644
index 0000000000..a4a0af96d9
--- /dev/null
+++ b/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.py
@@ -0,0 +1,75 @@
+# Generated by Django 5.0.7 on 2024-08-07 17:35
+
+import cl.lib.model_helpers
+import cl.lib.storage
+import pgtrigger.compiler
+import pgtrigger.migrations
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("search", "0032_update_docket_numbering_fields"),
+    ]
+
+    operations = [
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="opinioncluster",
+            name="update_or_delete_snapshot_update",
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="opinioncluster",
+            name="update_or_delete_snapshot_delete",
+        ),
+        migrations.AddField(
+            model_name="opinioncluster",
+            name="filepath_pdf_harvard",
+            field=models.FileField(
+                blank=True,
+                help_text="The case PDF from the Caselaw Access Project for this cluster",
+                null=True,
+                storage=cl.lib.storage.IncrementingAWSMediaStorage(),
+                upload_to=cl.lib.model_helpers.make_upload_path,
+            ),
+        ),
+        migrations.AddField(
+            model_name="opinionclusterevent",
+            name="filepath_pdf_harvard",
+            field=models.FileField(
+                blank=True,
+                help_text="The case PDF from the Caselaw Access Project for this cluster",
+                null=True,
+                storage=cl.lib.storage.IncrementingAWSMediaStorage(),
+                upload_to=cl.lib.model_helpers.make_upload_path,
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="opinioncluster",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_update",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."docket_id" IS DISTINCT FROM (NEW."docket_id") OR OLD."judges" IS DISTINCT FROM (NEW."judges") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_filed_is_approximate" IS DISTINCT FROM (NEW."date_filed_is_approximate") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."scdb_id" IS DISTINCT FROM (NEW."scdb_id") OR OLD."scdb_decision_direction" IS DISTINCT FROM (NEW."scdb_decision_direction") OR OLD."scdb_votes_majority" IS DISTINCT FROM (NEW."scdb_votes_majority") OR OLD."scdb_votes_minority" IS DISTINCT FROM (NEW."scdb_votes_minority") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."procedural_history" IS DISTINCT FROM (NEW."procedural_history") OR OLD."attorneys" IS DISTINCT FROM (NEW."attorneys") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."posture" IS DISTINCT FROM (NEW."posture") OR OLD."syllabus" IS DISTINCT FROM (NEW."syllabus") OR OLD."headnotes" IS DISTINCT FROM (NEW."headnotes") OR OLD."summary" IS DISTINCT FROM (NEW."summary") OR OLD."disposition" IS DISTINCT FROM (NEW."disposition") OR OLD."history" IS DISTINCT FROM (NEW."history") OR OLD."other_dates" IS DISTINCT FROM (NEW."other_dates") OR OLD."cross_reference" IS DISTINCT FROM (NEW."cross_reference") OR OLD."correction" IS DISTINCT FROM (NEW."correction") OR OLD."citation_count" IS DISTINCT FROM (NEW."citation_count") OR OLD."precedential_status" IS DISTINCT FROM (NEW."precedential_status") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked") OR OLD."filepath_json_harvard" IS DISTINCT FROM (NEW."filepath_json_harvard") OR OLD."filepath_pdf_harvard" IS DISTINCT FROM (NEW."filepath_pdf_harvard") OR OLD."arguments" IS DISTINCT FROM (NEW."arguments") OR OLD."headmatter" IS DISTINCT FROM (NEW."headmatter"))',
+                    func='INSERT INTO "search_opinionclusterevent" ("arguments", "attorneys", "blocked", "case_name", "case_name_full", "case_name_short", "citation_count", "correction", "cross_reference", "date_blocked", "date_created", "date_filed", "date_filed_is_approximate", "date_modified", "disposition", "docket_id", "filepath_json_harvard", "filepath_pdf_harvard", "headmatter", "headnotes", "history", "id", "judges", "nature_of_suit", "other_dates", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "posture", "precedential_status", "procedural_history", "scdb_decision_direction", "scdb_id", "scdb_votes_majority", "scdb_votes_minority", "slug", "source", "summary", "syllabus") VALUES (OLD."arguments", OLD."attorneys", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."citation_count", OLD."correction", OLD."cross_reference", OLD."date_blocked", OLD."date_created", OLD."date_filed", OLD."date_filed_is_approximate", OLD."date_modified", OLD."disposition", OLD."docket_id", OLD."filepath_json_harvard", OLD."filepath_pdf_harvard", OLD."headmatter", OLD."headnotes", OLD."history", OLD."id", OLD."judges", OLD."nature_of_suit", OLD."other_dates", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."posture", OLD."precedential_status", OLD."procedural_history", OLD."scdb_decision_direction", OLD."scdb_id", OLD."scdb_votes_majority", OLD."scdb_votes_minority", OLD."slug", OLD."source", OLD."summary", OLD."syllabus"); RETURN NULL;',
+                    hash="bd5a29c929acce5171721fc7a4471ceb5d8c6f87",
+                    operation="UPDATE",
+                    pgid="pgtrigger_update_or_delete_snapshot_update_6a181",
+                    table="search_opinioncluster",
+                    when="AFTER",
+                ),
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="opinioncluster",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_delete",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func='INSERT INTO "search_opinionclusterevent" ("arguments", "attorneys", "blocked", "case_name", "case_name_full", "case_name_short", "citation_count", "correction", "cross_reference", "date_blocked", "date_created", "date_filed", "date_filed_is_approximate", "date_modified", "disposition", "docket_id", "filepath_json_harvard", "filepath_pdf_harvard", "headmatter", "headnotes", "history", "id", "judges", "nature_of_suit", "other_dates", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "posture", "precedential_status", "procedural_history", "scdb_decision_direction", "scdb_id", "scdb_votes_majority", "scdb_votes_minority", "slug", "source", "summary", "syllabus") VALUES (OLD."arguments", OLD."attorneys", OLD."blocked", OLD."case_name", OLD."case_name_full", OLD."case_name_short", OLD."citation_count", OLD."correction", OLD."cross_reference", OLD."date_blocked", OLD."date_created", OLD."date_filed", OLD."date_filed_is_approximate", OLD."date_modified", OLD."disposition", OLD."docket_id", OLD."filepath_json_harvard", OLD."filepath_pdf_harvard", OLD."headmatter", OLD."headnotes", OLD."history", OLD."id", OLD."judges", OLD."nature_of_suit", OLD."other_dates", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."posture", OLD."precedential_status", OLD."procedural_history", OLD."scdb_decision_direction", OLD."scdb_id", OLD."scdb_votes_majority", OLD."scdb_votes_minority", OLD."slug", OLD."source", OLD."summary", OLD."syllabus"); RETURN NULL;',
+                    hash="aef7156b0ac4daa6c9fcc0c6ea2d981676bc6221",
+                    operation="DELETE",
+                    pgid="pgtrigger_update_or_delete_snapshot_delete_58fe8",
+                    table="search_opinioncluster",
+                    when="AFTER",
+                ),
+            ),
+        ),
+    ]
diff --git a/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.sql b/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.sql
new file mode 100644
index 0000000000..b45c149f2b
--- /dev/null
+++ b/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.sql
@@ -0,0 +1,4 @@
+BEGIN;
+ALTER TABLE "search_opinioncluster" ADD COLUMN "filepath_pdf_harvard" varchar(100) NULL;
+ALTER TABLE "search_opinionclusterevent" ADD COLUMN "filepath_pdf_harvard" varchar(100) NULL;
+COMMIT;
\ No newline at end of file
diff --git a/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster_customers.sql b/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster_customers.sql
new file mode 100644
index 0000000000..cd6be11330
--- /dev/null
+++ b/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster_customers.sql
@@ -0,0 +1,3 @@
+BEGIN;
+ALTER TABLE "search_opinioncluster" ADD COLUMN "filepath_pdf_harvard" varchar(100) NULL;
+COMMIT;
\ No newline at end of file

From 484bcd4a9f7eab90cfd6a7105356a6b4d5c24e41 Mon Sep 17 00:00:00 2001
From: jtmst <josh.troy.mills@gmail.com>
Date: Wed, 7 Aug 2024 14:39:11 -0400
Subject: [PATCH 152/372] bulk data script and readme update

---
 cl/api/templates/bulk-data.html |  3 +++
 scripts/make_bulk_data.sh       | 18 +++++++++---------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/cl/api/templates/bulk-data.html b/cl/api/templates/bulk-data.html
index 9dd126a812..0929b9df08 100644
--- a/cl/api/templates/bulk-data.html
+++ b/cl/api/templates/bulk-data.html
@@ -163,6 +163,9 @@ <h3 id="contributing">Adding Features and Fixing Bugs</h3>
     </p>
 
     <h3 id="old">Release Notes</h3>
+    <p>
+      <strong>2024-08-07</strong>: Added <code>filepath_pdf_harvard</code> field to OpinionCluster data in bulk exports. This field contains the path to the PDF file from the Harvard Caselaw Access Project for the given case.
+    </p>
 	<p>
       <strong>2024-08-02</strong>: Add new fields to the bulk data files for the Docket object: federal_dn_case_type, federal_dn_office_code, federal_dn_judge_initials_assigned, federal_dn_judge_initials_referred, federal_defendant_number, parent_docket_id.
     </p>
diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 2e206c773d..c07569ef76 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -84,15 +84,15 @@ fjcintegrateddatabase_csv_filename="fjc-integrated-database-$(date -I).csv"
 
 # search_opinioncluster
 opinioncluster_fields='(
-	       id, date_created, date_modified, judges, date_filed,
-	       date_filed_is_approximate, slug, case_name_short, case_name,
-	       case_name_full, scdb_id, scdb_decision_direction, scdb_votes_majority,
-	       scdb_votes_minority, source, procedural_history, attorneys,
-	       nature_of_suit, posture, syllabus, headnotes, summary, disposition,
-	       history, other_dates, cross_reference, correction, citation_count,
-	       precedential_status, date_blocked, blocked, filepath_json_harvard, docket_id,
-	       arguments, headmatter
-	   )'
+       id, date_created, date_modified, judges, date_filed,
+       date_filed_is_approximate, slug, case_name_short, case_name,
+       case_name_full, scdb_id, scdb_decision_direction, scdb_votes_majority,
+       scdb_votes_minority, source, procedural_history, attorneys,
+       nature_of_suit, posture, syllabus, headnotes, summary, disposition,
+       history, other_dates, cross_reference, correction, citation_count,
+       precedential_status, date_blocked, blocked, filepath_json_harvard, filepath_pdf_harvard, docket_id,
+       arguments, headmatter
+   )'
 opinioncluster_csv_filename="opinion-clusters-$(date -I).csv"
 
 search_opinion_joined_by_fields='(

From 3be20cc9e3460e5a96e2def333567b32727509f3 Mon Sep 17 00:00:00 2001
From: jtmst <josh.troy.mills@gmail.com>
Date: Thu, 8 Aug 2024 09:33:28 -0400
Subject: [PATCH 153/372] code style and remove null=True on model - PR
 feedback

---
 cl/search/models.py       | 1 -
 scripts/make_bulk_data.sh | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index dc1772833b..1185d363d4 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2629,7 +2629,6 @@ class OpinionCluster(AbstractDateTimeModel):
         upload_to=make_upload_path,
         storage=IncrementingAWSMediaStorage(),
         blank=True,
-        null=True,
     )
     arguments = models.TextField(
         help_text="The attorney(s) and legal arguments presented as HTML text. "
diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index c07569ef76..e1f0f30fea 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -90,8 +90,8 @@ opinioncluster_fields='(
        scdb_votes_minority, source, procedural_history, attorneys,
        nature_of_suit, posture, syllabus, headnotes, summary, disposition,
        history, other_dates, cross_reference, correction, citation_count,
-       precedential_status, date_blocked, blocked, filepath_json_harvard, filepath_pdf_harvard, docket_id,
-       arguments, headmatter
+       precedential_status, date_blocked, blocked, filepath_json_harvard,
+			 filepath_pdf_harvard, docket_id, arguments, headmatter
    )'
 opinioncluster_csv_filename="opinion-clusters-$(date -I).csv"
 

From 59c3089e6363c87e510141ade1617c855f2fb906 Mon Sep 17 00:00:00 2001
From: jtmst <josh.troy.mills@gmail.com>
Date: Thu, 8 Aug 2024 13:43:54 -0400
Subject: [PATCH 154/372] rebase and fix migration collision

---
 ...nioncluster.py => 0034_add_harvard_pdf_to_opinioncluster.py} | 2 +-
 ...oncluster.sql => 0034_add_harvard_pdf_to_opinioncluster.sql} | 0
 ...sql => 0034_add_harvard_pdf_to_opinioncluster_customers.sql} | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0033_add_harvard_pdf_to_opinioncluster.py => 0034_add_harvard_pdf_to_opinioncluster.py} (99%)
 rename cl/search/migrations/{0033_add_harvard_pdf_to_opinioncluster.sql => 0034_add_harvard_pdf_to_opinioncluster.sql} (100%)
 rename cl/search/migrations/{0033_add_harvard_pdf_to_opinioncluster_customers.sql => 0034_add_harvard_pdf_to_opinioncluster_customers.sql} (100%)

diff --git a/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.py b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.py
similarity index 99%
rename from cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.py
rename to cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.py
index a4a0af96d9..ad8d331906 100644
--- a/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.py
+++ b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.py
@@ -9,7 +9,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0032_update_docket_numbering_fields"),
+        ("search", "0033_order_opinions"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.sql b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.sql
similarity index 100%
rename from cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster.sql
rename to cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.sql
diff --git a/cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster_customers.sql b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster_customers.sql
similarity index 100%
rename from cl/search/migrations/0033_add_harvard_pdf_to_opinioncluster_customers.sql
rename to cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster_customers.sql

From 9c683a02eb7b033c741c7d92cfd28b6362bd95cc Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 8 Aug 2024 15:40:25 -0500
Subject: [PATCH 155/372] fix(search): Added docket entry date filed within the
 RECAP search results.

Fixes: #4254
---
 cl/custom_filters/templatetags/extras.py      | 12 +++++
 .../templates/includes/search_result.html     | 10 ++++
 cl/search/tests/tests_es_recap.py             | 51 +++++++++++++++++++
 3 files changed, 73 insertions(+)

diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 3ea483b921..6b6272cdfc 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -1,5 +1,6 @@
 import random
 import re
+from datetime import date, datetime
 
 from django import template
 from django.core.exceptions import ValidationError
@@ -273,3 +274,14 @@ def group_courts(courts: list[Court], num_columns: int) -> list:
         start = end
 
     return groups
+
+
+@register.filter
+def format_date(date_str: str) -> str:
+    """Formats a date string in the format 'F jS, Y'. Useful for formatting
+    ES child document results where dates are not date objects."""
+    try:
+        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
+        return date_obj.strftime("%B %dth, %Y")
+    except (ValueError, TypeError):
+        return date_str
diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index 6776c98ad8..af24b7b56d 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -179,6 +179,16 @@ <h4>
               {% endif %}
             {% endif %}
           </h4>
+          <div class="inline-block">
+            <span class="meta-data-header">Date Filed:</span>
+            <time class="meta-data-value" datetime="{{ doc.entry_date_filed }}">
+              {% if doc.entry_date_filed %}
+                {{ doc.entry_date_filed|format_date }}
+              {% else %}
+                Unknown Date
+              {% endif %}
+            </time>
+          </div>
           {% if doc.description %}
             <div class="inline-block">
               <span class="meta-data-header">Description:</span>
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 7c8c8fcc41..224d285014 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -135,6 +135,29 @@ def _count_child_documents(
             "     Got: %s\n\n" % (field_name, expected_count, got),
         )
 
+    def _compare_child_entry_date_filed(
+        self, article, html_content, child_index, expected_date
+    ):
+        """Assert entry date filed in child results."""
+        tree = html.fromstring(html_content)
+        article = tree.xpath("//article")[article]
+        col_md_offset_half_elements = article.xpath(
+            f".//div[@class='bottom']//div[@class='col-md-offset-half']"
+        )
+        col_md_offset_half_elem = col_md_offset_half_elements[child_index]
+        inline_element = col_md_offset_half_elem.xpath(
+            ".//div[contains(@class, 'inline-block')]"
+        )[0]
+        date = inline_element.xpath(".//time[@class='meta-data-value']")
+        meta_data_value = date[0].text.strip()
+        self.assertEqual(
+            meta_data_value,
+            expected_date,
+            msg="Did not get the right expected entry date filed \n"
+            "Expected: %s\n"
+            "     Got: %s\n\n" % (expected_date, meta_data_value),
+        )
+
     def _assert_results_header_content(self, html_content, expected_text):
         h2_element = html.fromstring(html_content).xpath(
             '//h2[@id="result-count"]'
@@ -2004,6 +2027,20 @@ def test_results_ordering(self) -> None:
             msg="'1:21-bk-1234' should come BEFORE '12-1235' when order_by entry_date_filed  desc.",
         )
 
+        # Confirm entry date filed are properly displayed.
+        self._compare_child_entry_date_filed(
+            0, r.content.decode(), 0, "August 19th, 2015"
+        )
+        self._compare_child_entry_date_filed(
+            0, r.content.decode(), 1, "August 19th, 2015"
+        )
+        self._compare_child_entry_date_filed(
+            1, r.content.decode(), 0, "July 19th, 2014"
+        )
+        self._compare_child_entry_date_filed(
+            2, r.content.decode(), 0, "February 23th, 1732"
+        )
+
         # Order by entry_date_filed asc
         # Ordering by a child field, dockets without entries should come last.
         params = {
@@ -2024,6 +2061,20 @@ def test_results_ordering(self) -> None:
             msg="'12-0000' should come BEFORE '12-1235' when order_by entry_date_filed asc.",
         )
 
+        # Confirm entry date filed are properly displayed.
+        self._compare_child_entry_date_filed(
+            0, r.content.decode(), 0, "February 23th, 1732"
+        )
+        self._compare_child_entry_date_filed(
+            1, r.content.decode(), 0, "July 19th, 2014"
+        )
+        self._compare_child_entry_date_filed(
+            2, r.content.decode(), 0, "August 19th, 2015"
+        )
+        self._compare_child_entry_date_filed(
+            2, r.content.decode(), 1, "August 19th, 2015"
+        )
+
         # Order by entry_date_filed desc in match all queries.
         # Ordering by a child field, dockets without entries should come last.
         params = {

From d1e5bc5dd50a32fecc06a0d09571598929d02fb1 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 8 Aug 2024 16:08:42 -0500
Subject: [PATCH 156/372] fix(search): Fixed docket entry date field display

---
 cl/search/templates/includes/search_result.html | 2 +-
 cl/search/tests/tests_es_recap.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index af24b7b56d..635300b8c0 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -179,7 +179,7 @@ <h4>
               {% endif %}
             {% endif %}
           </h4>
-          <div class="inline-block">
+          <div class="date-block">
             <span class="meta-data-header">Date Filed:</span>
             <time class="meta-data-value" datetime="{{ doc.entry_date_filed }}">
               {% if doc.entry_date_filed %}
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 224d285014..5a1740c3bf 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -146,7 +146,7 @@ def _compare_child_entry_date_filed(
         )
         col_md_offset_half_elem = col_md_offset_half_elements[child_index]
         inline_element = col_md_offset_half_elem.xpath(
-            ".//div[contains(@class, 'inline-block')]"
+            ".//div[contains(@class, 'date-block')]"
         )[0]
         date = inline_element.xpath(".//time[@class='meta-data-value']")
         meta_data_value = date[0].text.strip()

From 1613069ff9151980f554c8fe2ffba005d1b9bdef Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Aug 2024 22:25:27 +0000
Subject: [PATCH 157/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/lib/model_helpers.py | 21 +++++++++++------
 cl/lib/tests.py         | 52 +++++++++++++++++++----------------------
 cl/search/models.py     |  6 +++--
 3 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 349e21a203..d70b443906 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -488,6 +488,7 @@ def suppress_autotime(model, fields):
                     "auto_now_add"
                 ]
 
+
 def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
     """Make an originating docket number for an appellate case into a link (MVP version)
 
@@ -496,20 +497,26 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
     :returns: A linkified version of the docket number for the user to click on, or the original if no link can be made.
     """
     # Simple pattern for Federal Register citations
-    fr_match = re.search(r'(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,5})', og_docket_number)
+    fr_match = re.search(
+        r"(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,5})", og_docket_number
+    )
     if fr_match:
         volume, page = fr_match.groups()
         return f"https://www.federalregister.gov/citation/{volume}-FR-{page}"
-    
+
     # NLRB pattern
-    if agency == 'National Labor Relations Board':
-        match = re.match(r'^(?:NLRB-)?(\d{1,2})-?([A-Z]{2})-?(\d{1,6})$', og_docket_number)
+    if agency == "National Labor Relations Board":
+        match = re.match(
+            r"^(?:NLRB-)?(\d{1,2})-?([A-Z]{2})-?(\d{1,6})$", og_docket_number
+        )
         if match:
             region, case_type, number = match.groups()
-            formatted_number = f"{region.zfill(2)}-{case_type}-{number.zfill(6)}"
+            formatted_number = (
+                f"{region.zfill(2)}-{case_type}-{number.zfill(6)}"
+            )
             return f"https://www.nlrb.gov/case/{formatted_number}"
-    
+
     """Add other agencies as feasible. Note that the Federal Register link should cover multiple agencies.
     """
     # If no match is found, return None
-    return None
\ No newline at end of file
+    return None
diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index 5e712399e8..3b046b7262 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -16,9 +16,9 @@
 from cl.lib.model_helpers import (
     clean_docket_number,
     is_docket_number,
+    linkify_orig_docket_number,
     make_docket_number_core,
     make_upload_path,
-    linkify_orig_docket_number,
 )
 from cl.lib.pacer import (
     get_blocked_status,
@@ -1258,67 +1258,63 @@ def test_redis_lock(self) -> None:
         result = release_redis_lock(r, lock_key, identifier)
         self.assertEqual(result, 1)
 
+
 class TestLinkifyOrigDocketNumber(SimpleTestCase):
     def test_linkify_orig_docket_number(self):
         test_pairs = [
             (
                 "National Labor Relations Board",
                 "19-CA-289275",
-                "https://www.nlrb.gov/case/19-CA-289275"
+                "https://www.nlrb.gov/case/19-CA-289275",
             ),
             (
                 "National Labor Relations Board",
                 "NLRB-09CA110508",
-                "https://www.nlrb.gov/case/09-CA-110508"
+                "https://www.nlrb.gov/case/09-CA-110508",
             ),
             (
                 "EPA",
                 "85 FR 20688",
-                "https://www.federalregister.gov/citation/85-FR-20688"
+                "https://www.federalregister.gov/citation/85-FR-20688",
             ),
             (
                 "Other Agency",
                 "85 Fed. Reg. 12345",
-                "https://www.federalregister.gov/citation/85-FR-12345"
+                "https://www.federalregister.gov/citation/85-FR-12345",
             ),
             (
                 "National Labor Relations Board",
                 "85 Fed. Reg. 12345",
-                "https://www.federalregister.gov/citation/85-FR-12345"
+                "https://www.federalregister.gov/citation/85-FR-12345",
             ),
             (
                 "Bureau of Land Managemnet",
                 "88FR20688",
-                "https://www.federalregister.gov/citation/88-FR-20688"
+                "https://www.federalregister.gov/citation/88-FR-20688",
             ),
             (
                 "Bureau of Land Managemnet",
                 "88 Fed Reg 34523",
-                "https://www.federalregister.gov/citation/88-FR-34523"
-            ),
-            (
-                "Federal Communications Commission",
-                "19-CA-289275",
-                None
+                "https://www.federalregister.gov/citation/88-FR-34523",
             ),
+            ("Federal Communications Commission", "19-CA-289275", None),
             (
                 "National Labor Relations Board",
                 "This is not an NLRB case",
-                None
-            ),
-            (
-                "Other Agency",
-                "This is not a Federal Register citation",
-                None
+                None,
             ),
+            ("Other Agency", "This is not a Federal Register citation", None),
         ]
 
-        for i, (agency, docket_number, expected_output) in enumerate(test_pairs):
-            with self.subTest( 
-            f"Testing description text cleaning for {agency, docket_number}...", i=i 
-        ): 
-                self.assertEqual( 
-                    linkify_orig_docket_number(agency, docket_number), 
-                    expected_output, 
-                    f"Got incorrect result from clean_parenthetical_text for text: {agency, docket_number}", 
-                )
\ No newline at end of file
+        for i, (agency, docket_number, expected_output) in enumerate(
+            test_pairs
+        ):
+            with self.subTest(
+                f"Testing description text cleaning for {agency, docket_number}...",
+                i=i,
+            ):
+                self.assertEqual(
+                    linkify_orig_docket_number(agency, docket_number),
+                    expected_output,
+                    f"Got incorrect result from clean_parenthetical_text for text: {agency, docket_number}",
+                )
diff --git a/cl/search/models.py b/cl/search/models.py
index e6c05d6d2c..10c89f37cb 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -28,10 +28,10 @@
 from cl.lib import fields
 from cl.lib.date_time import midnight_pt
 from cl.lib.model_helpers import (
+    linkify_orig_docket_number,
     make_docket_number_core,
     make_recap_path,
     make_upload_path,
-    linkify_orig_docket_number,
 )
 from cl.lib.models import AbstractDateTimeModel, AbstractPDF, s3_warning_note
 from cl.lib.pghistory import AfterUpdateOrDeleteSnapshot
@@ -330,7 +330,9 @@ class OriginatingCourtInformation(AbstractDateTimeModel):
 
     @property
     def administrative_link(self):
-        return linkify_orig_docket_number(self.docket.appeal_from_str, self.docket_number)
+        return linkify_orig_docket_number(
+            self.docket.appeal_from_str, self.docket_number
+        )
 
     def get_absolute_url(self) -> str:
         return self.docket.get_absolute_url()

From e5e6d67e58f2290a9007ad99cdd0c68cf7866077 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 8 Aug 2024 18:39:48 -0400
Subject: [PATCH 158/372] Update model_helpers.py

---
 cl/lib/model_helpers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index d70b443906..ab3c93df5c 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -518,5 +518,5 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
 
     """Add other agencies as feasible. Note that the Federal Register link should cover multiple agencies.
     """
-    # If no match is found, return None
-    return None
+    # If no match is found, return empty str
+    return ""

From 1a7f5266b106c8ce2d9791804730b7fd44f1ae2f Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 8 Aug 2024 18:41:05 -0400
Subject: [PATCH 159/372] Update tests.py

fixing tests based on change to model
---
 cl/lib/tests.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index 3b046b7262..57295289cb 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -1297,13 +1297,13 @@ def test_linkify_orig_docket_number(self):
                 "88 Fed Reg 34523",
                 "https://www.federalregister.gov/citation/88-FR-34523",
             ),
-            ("Federal Communications Commission", "19-CA-289275", None),
+            ("Federal Communications Commission", "19-CA-289275", ""),
             (
                 "National Labor Relations Board",
                 "This is not an NLRB case",
-                None,
+                "",
             ),
-            ("Other Agency", "This is not a Federal Register citation", None),
+            ("Other Agency", "This is not a Federal Register citation", ""),
         ]
 
         for i, (agency, docket_number, expected_output) in enumerate(

From 730d0c99971d6dcfe13856b33683467c8f764eeb Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 8 Aug 2024 18:03:40 -0500
Subject: [PATCH 160/372] fix(poetry): Fixed poetry  merge conflicts

---
 poetry.lock | 50 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 64c3fe0ad3..91ab46c93f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -2236,13 +2236,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.14"
+version = "2.6.15"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.14-py27-none-any.whl", hash = "sha256:97b9a3e607606398ce9c26dd867bc9ddbc6dfc2ba2e8618dc23e673256931aaa"},
-    {file = "juriscraper-2.6.14.tar.gz", hash = "sha256:fee119f1f56d91c554646e0ebd4057e554321efa39224cf7c16b02989c99ce3a"},
+    {file = "juriscraper-2.6.15-py27-none-any.whl", hash = "sha256:d65d3e13c64815792c008ddeb070b5416ccf464892364d0d1b696f4752f0b085"},
+    {file = "juriscraper-2.6.15.tar.gz", hash = "sha256:eef3dd6a3db986dfe9545f5657454467d728ec34d9ffb849fb9bd2a8bd411b91"},
 ]
 
 [package.dependencies]
@@ -2773,27 +2773,27 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
 
 [[package]]
 name = "nh3"
-version = "0.2.17"
+version = "0.2.18"
 description = "Python bindings to the ammonia HTML sanitization library."
 optional = false
 python-versions = "*"
 files = [
-    {file = "nh3-0.2.17-cp37-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:551672fd71d06cd828e282abdb810d1be24e1abb7ae2543a8fa36a71c1006fe9"},
-    {file = "nh3-0.2.17-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c551eb2a3876e8ff2ac63dff1585236ed5dfec5ffd82216a7a174f7c5082a78a"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:66f17d78826096291bd264f260213d2b3905e3c7fae6dfc5337d49429f1dc9f3"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0316c25b76289cf23be6b66c77d3608a4fdf537b35426280032f432f14291b9a"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:22c26e20acbb253a5bdd33d432a326d18508a910e4dcf9a3316179860d53345a"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:85cdbcca8ef10733bd31f931956f7fbb85145a4d11ab9e6742bbf44d88b7e351"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:40015514022af31975c0b3bca4014634fa13cb5dc4dbcbc00570acc781316dcc"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba73a2f8d3a1b966e9cdba7b211779ad8a2561d2dba9674b8a19ed817923f65f"},
-    {file = "nh3-0.2.17-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c21bac1a7245cbd88c0b0e4a420221b7bfa838a2814ee5bb924e9c2f10a1120b"},
-    {file = "nh3-0.2.17-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d7a25fd8c86657f5d9d576268e3b3767c5cd4f42867c9383618be8517f0f022a"},
-    {file = "nh3-0.2.17-cp37-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:c790769152308421283679a142dbdb3d1c46c79c823008ecea8e8141db1a2062"},
-    {file = "nh3-0.2.17-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:b4427ef0d2dfdec10b641ed0bdaf17957eb625b2ec0ea9329b3d28806c153d71"},
-    {file = "nh3-0.2.17-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a3f55fabe29164ba6026b5ad5c3151c314d136fd67415a17660b4aaddacf1b10"},
-    {file = "nh3-0.2.17-cp37-abi3-win32.whl", hash = "sha256:1a814dd7bba1cb0aba5bcb9bebcc88fd801b63e21e2450ae6c52d3b3336bc911"},
-    {file = "nh3-0.2.17-cp37-abi3-win_amd64.whl", hash = "sha256:1aa52a7def528297f256de0844e8dd680ee279e79583c76d6fa73a978186ddfb"},
-    {file = "nh3-0.2.17.tar.gz", hash = "sha256:40d0741a19c3d645e54efba71cb0d8c475b59135c1e3c580f879ad5514cbf028"},
+    {file = "nh3-0.2.18-cp37-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86"},
+    {file = "nh3-0.2.18-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307"},
+    {file = "nh3-0.2.18-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f"},
+    {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe"},
+    {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a"},
+    {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50"},
+    {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204"},
+    {file = "nh3-0.2.18-cp37-abi3-win32.whl", hash = "sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be"},
+    {file = "nh3-0.2.18-cp37-abi3-win_amd64.whl", hash = "sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844"},
+    {file = "nh3-0.2.18.tar.gz", hash = "sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4"},
 ]
 
 [[package]]
@@ -4108,13 +4108,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "requests-file"
-version = "2.0.0"
+version = "2.1.0"
 description = "File transport adapter for Requests"
 optional = false
 python-versions = "*"
 files = [
-    {file = "requests-file-2.0.0.tar.gz", hash = "sha256:20c5931629c558fda566cacc10cfe2cd502433e628f568c34c80d96a0cc95972"},
-    {file = "requests_file-2.0.0-py2.py3-none-any.whl", hash = "sha256:3e493d390adb44aa102ebea827a48717336d5268968c370eaf19abaf5cae13bf"},
+    {file = "requests_file-2.1.0-py2.py3-none-any.whl", hash = "sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c"},
+    {file = "requests_file-2.1.0.tar.gz", hash = "sha256:0f549a3f3b0699415ac04d167e9cb39bccfb730cb832b4d20be3d9867356e658"},
 ]
 
 [package.dependencies]
@@ -5483,4 +5483,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "76d77892b4272644423b572867efc23307baec23355ce6f82b7e5902776627cf"
+content-hash = "a3d88503a3cd47954b5f6ee349f8bce37199566a51ab607e09bde4e73ab3e629"

From faac91e0e15c03dd6dfb9c17c8b5048cd03425b7 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 9 Aug 2024 12:47:25 -0500
Subject: [PATCH 161/372] fix(elasticsearch): Fixed cardinality_count to avoid
 tracking total hits

---
 cl/lib/elasticsearch_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 8cc5fe87f2..a256a4890f 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1964,12 +1964,12 @@ def fetch_es_results(
         main_response = responses[0]
         main_doc_count_response = responses[1]
         parent_total = simplify_estimated_count(
-            main_doc_count_response.hits.total.value
+            main_doc_count_response.aggregations.unique_documents.value
         )
         if child_total_query:
             child_doc_count_response = responses[2]
             child_total = simplify_estimated_count(
-                child_doc_count_response.hits.total.value
+                child_doc_count_response.aggregations.unique_documents.value
             )
 
         query_time = main_response.took
@@ -2962,7 +2962,7 @@ def build_cardinality_count(count_query: Search, unique_field: str) -> Search:
         field=unique_field,
         precision_threshold=settings.ELASTICSEARCH_CARDINALITY_PRECISION,
     )
-    return count_query.extra(size=0, track_total_hits=True)
+    return count_query.extra(size=0, track_total_hits=False)
 
 
 def do_collapse_count_query(

From 67cf23bdb2b7d530beca35b44430764afe075afc Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 9 Aug 2024 13:42:28 -0500
Subject: [PATCH 162/372] fix(elasticsearch): Disable tracking of hits in the
 main query

---
 cl/lib/elasticsearch_utils.py | 8 ++++----
 cl/lib/paginators.py          | 2 --
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index a256a4890f..cb68c1dc7d 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1934,11 +1934,11 @@ def fetch_es_results(
     es_from = (page - 1) * rows_per_page
     error = True
     try:
-        main_query = search_query.extra(from_=es_from, size=rows_per_page)
+        # Set track_total_hits False to avoid retrieving the hit count in the main query.
+        main_query = search_query.extra(
+            from_=es_from, size=rows_per_page, track_total_hits=False
+        )
         main_doc_count_query = clean_count_query(search_query)
-        # Set size to 0 to avoid retrieving documents in the count queries for
-        # better performance. Set track_total_hits to True to consider all the
-        # documents.
 
         search_type = get_params.get("type", SEARCH_TYPES.OPINION)
         parent_unique_field = cardinality_query_unique_ids[search_type]
diff --git a/cl/lib/paginators.py b/cl/lib/paginators.py
index 0c37aea9c1..aa962f75e6 100644
--- a/cl/lib/paginators.py
+++ b/cl/lib/paginators.py
@@ -11,8 +11,6 @@ def __init__(self, total_query_results: int | None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         if total_query_results:
             self._count = total_query_results
-        elif hasattr(self.object_list, "hits"):
-            self._count = self.object_list.hits.total.value
         else:
             self._count = len(self.object_list)
         self._aggregations = (

From e9c0dcc4e16e0b79ed1a58750d566b338148dede Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Mon, 12 Aug 2024 14:47:16 +0000
Subject: [PATCH 163/372] Update freelawproject dependencies

---
 poetry.lock | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 91ab46c93f..ed6b83007b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -2236,13 +2236,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.15"
+version = "2.6.16"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.15-py27-none-any.whl", hash = "sha256:d65d3e13c64815792c008ddeb070b5416ccf464892364d0d1b696f4752f0b085"},
-    {file = "juriscraper-2.6.15.tar.gz", hash = "sha256:eef3dd6a3db986dfe9545f5657454467d728ec34d9ffb849fb9bd2a8bd411b91"},
+    {file = "juriscraper-2.6.16-py27-none-any.whl", hash = "sha256:ffaefea58f3fb88a195bd50f6cb783ca9b0029a421ef0b1118568fc5dd303377"},
+    {file = "juriscraper-2.6.16.tar.gz", hash = "sha256:42a2c95e36300369bba0496eea618401f5c409033d87ba6cd83074a988ed72b3"},
 ]
 
 [package.dependencies]

From 4c89761c1fc8ba0d5c293561b6b36f7e6aa58dd6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 15:01:04 +0000
Subject: [PATCH 164/372] build(deps): bump scikit-learn from 1.4.0 to 1.5.1

Bumps [scikit-learn](https://github.com/scikit-learn/scikit-learn) from 1.4.0 to 1.5.1.
- [Release notes](https://github.com/scikit-learn/scikit-learn/releases)
- [Commits](https://github.com/scikit-learn/scikit-learn/compare/1.4.0...1.5.1)

---
updated-dependencies:
- dependency-name: scikit-learn
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 77 ++++++++++++++++++++------------------------------
 pyproject.toml |  2 +-
 2 files changed, 31 insertions(+), 48 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index ed6b83007b..acbbfed285 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4153,65 +4153,48 @@ contextlib2 = ">=0.5.5"
 
 [[package]]
 name = "scikit-learn"
-version = "1.4.0"
+version = "1.5.1"
 description = "A set of python modules for machine learning and data mining"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "scikit-learn-1.4.0.tar.gz", hash = "sha256:d4373c984eba20e393216edd51a3e3eede56cbe93d4247516d205643c3b93121"},
-    {file = "scikit_learn-1.4.0-1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fce93a7473e2f4ee4cc280210968288d6a7d7ad8dc6fa7bb7892145e407085f9"},
-    {file = "scikit_learn-1.4.0-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d77df3d1e15fc37a9329999979fa7868ba8655dbab21fe97fc7ddabac9e08cc7"},
-    {file = "scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2404659fedec40eeafa310cd14d613e564d13dbf8f3c752d31c095195ec05de6"},
-    {file = "scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e98632da8f6410e6fb6bf66937712c949b4010600ccd3f22a5388a83e610cc3c"},
-    {file = "scikit_learn-1.4.0-1-cp310-cp310-win_amd64.whl", hash = "sha256:11b3b140f70fbc9f6a08884631ae8dd60a4bb2d7d6d1de92738ea42b740d8992"},
-    {file = "scikit_learn-1.4.0-1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8341eabdc754d5ab91641a7763243845e96b6d68e03e472531e88a4f1b09f21"},
-    {file = "scikit_learn-1.4.0-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d1f6bce875ac2bb6b52514f67c185c564ccd299a05b65b7bab091a4c13dde12d"},
-    {file = "scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c408b46b2fd61952d519ea1af2f8f0a7a703e1433923ab1704c4131520b2083b"},
-    {file = "scikit_learn-1.4.0-1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b465dd1dcd237b7b1dcd1a9048ccbf70a98c659474324fa708464c3a2533fad"},
-    {file = "scikit_learn-1.4.0-1-cp311-cp311-win_amd64.whl", hash = "sha256:0db8e22c42f7980fe5eb22069b1f84c48966f3e0d23a01afde5999e3987a2501"},
-    {file = "scikit_learn-1.4.0-1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e7eef6ea2ed289af40e88c0be9f7704ca8b5de18508a06897c3fe21e0905efdf"},
-    {file = "scikit_learn-1.4.0-1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:349669b01435bc4dbf25c6410b0892073befdaec52637d1a1d1ff53865dc8db3"},
-    {file = "scikit_learn-1.4.0-1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d439c584e58434d0350701bd33f6c10b309e851fccaf41c121aed55f6851d8cf"},
-    {file = "scikit_learn-1.4.0-1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0e2427d9ef46477625ab9b55c1882844fe6fc500f418c3f8e650200182457bc"},
-    {file = "scikit_learn-1.4.0-1-cp312-cp312-win_amd64.whl", hash = "sha256:d3d75343940e7bf9b85c830c93d34039fa015eeb341c5c0b4cd7a90dadfe00d4"},
-    {file = "scikit_learn-1.4.0-1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:76986d22e884ab062b1beecdd92379656e9d3789ecc1f9870923c178de55f9fe"},
-    {file = "scikit_learn-1.4.0-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e22446ad89f1cb7657f0d849dcdc345b48e2d10afa3daf2925fdb740f85b714c"},
-    {file = "scikit_learn-1.4.0-1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74812c9eabb265be69d738a8ea8d4884917a59637fcbf88a5f0e9020498bc6b3"},
-    {file = "scikit_learn-1.4.0-1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad2a63e0dd386b92da3270887a29b308af4d7c750d8c4995dfd9a4798691bcc"},
-    {file = "scikit_learn-1.4.0-1-cp39-cp39-win_amd64.whl", hash = "sha256:53b9e29177897c37e2ff9d4ba6ca12fdb156e22523e463db05def303f5c72b5c"},
-    {file = "scikit_learn-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb8f044a8f5962613ce1feb4351d66f8d784bd072d36393582f351859b065f7d"},
-    {file = "scikit_learn-1.4.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:a6372c90bbf302387792108379f1ec77719c1618d88496d0df30cb8e370b4661"},
-    {file = "scikit_learn-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:785ce3c352bf697adfda357c3922c94517a9376002971bc5ea50896144bc8916"},
-    {file = "scikit_learn-1.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0aba2a20d89936d6e72d95d05e3bf1db55bca5c5920926ad7b92c34f5e7d3bbe"},
-    {file = "scikit_learn-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:2bac5d56b992f8f06816f2cd321eb86071c6f6d44bb4b1cb3d626525820d754b"},
-    {file = "scikit_learn-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27ae4b0f1b2c77107c096a7e05b33458354107b47775428d1f11b23e30a73e8a"},
-    {file = "scikit_learn-1.4.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5c5c62ffb52c3ffb755eb21fa74cc2cbf2c521bd53f5c04eaa10011dbecf5f80"},
-    {file = "scikit_learn-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f0d2018ac6fa055dab65fe8a485967990d33c672d55bc254c56c35287b02fab"},
-    {file = "scikit_learn-1.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a8918c415c4b4bf1d60c38d32958849a9191c2428ab35d30b78354085c7c7a"},
-    {file = "scikit_learn-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:80a21de63275f8bcd7877b3e781679d2ff1eddfed515a599f95b2502a3283d42"},
-    {file = "scikit_learn-1.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0f33bbafb310c26b81c4d41ecaebdbc1f63498a3f13461d50ed9a2e8f24d28e4"},
-    {file = "scikit_learn-1.4.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:8b6ac1442ec714b4911e5aef8afd82c691b5c88b525ea58299d455acc4e8dcec"},
-    {file = "scikit_learn-1.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05fc5915b716c6cc60a438c250108e9a9445b522975ed37e416d5ea4f9a63381"},
-    {file = "scikit_learn-1.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:842b7d6989f3c574685e18da6f91223eb32301d0f93903dd399894250835a6f7"},
-    {file = "scikit_learn-1.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:88bcb586fdff865372df1bc6be88bb7e6f9e0aa080dab9f54f5cac7eca8e2b6b"},
-    {file = "scikit_learn-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f77674647dd31f56cb12ed13ed25b6ed43a056fffef051715022d2ebffd7a7d1"},
-    {file = "scikit_learn-1.4.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:833999872e2920ce00f3a50839946bdac7539454e200eb6db54898a41f4bfd43"},
-    {file = "scikit_learn-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:970ec697accaef10fb4f51763f3a7b1250f9f0553cf05514d0e94905322a0172"},
-    {file = "scikit_learn-1.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:923d778f378ebacca2c672ab1740e5a413e437fb45ab45ab02578f8b689e5d43"},
-    {file = "scikit_learn-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:1d041bc95006b545b59e458399e3175ab11ca7a03dc9a74a573ac891f5df1489"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:781586c414f8cc58e71da4f3d7af311e0505a683e112f2f62919e3019abd3745"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5b213bc29cc30a89a3130393b0e39c847a15d769d6e59539cd86b75d276b1a7"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ff4ba34c2abff5ec59c803ed1d97d61b036f659a17f55be102679e88f926fac"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:161808750c267b77b4a9603cf9c93579c7a74ba8486b1336034c2f1579546d21"},
+    {file = "scikit_learn-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:10e49170691514a94bb2e03787aa921b82dbc507a4ea1f20fd95557862c98dc1"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:154297ee43c0b83af12464adeab378dee2d0a700ccd03979e2b821e7dd7cc1c2"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b5e865e9bd59396220de49cb4a57b17016256637c61b4c5cc81aaf16bc123bbe"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909144d50f367a513cee6090873ae582dba019cb3fca063b38054fa42704c3a4"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689b6f74b2c880276e365fe84fe4f1befd6a774f016339c65655eaff12e10cbf"},
+    {file = "scikit_learn-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:9a07f90846313a7639af6a019d849ff72baadfa4c74c778821ae0fad07b7275b"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5944ce1faada31c55fb2ba20a5346b88e36811aab504ccafb9f0339e9f780395"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0828673c5b520e879f2af6a9e99eee0eefea69a2188be1ca68a6121b809055c1"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508907e5f81390e16d754e8815f7497e52139162fd69c4fdbd2dfa5d6cc88915"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97625f217c5c0c5d0505fa2af28ae424bd37949bb2f16ace3ff5f2f81fb4498b"},
+    {file = "scikit_learn-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:da3f404e9e284d2b0a157e1b56b6566a34eb2798205cba35a211df3296ab7a74"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:88e0672c7ac21eb149d409c74cc29f1d611d5158175846e7a9c2427bd12b3956"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7b073a27797a283187a4ef4ee149959defc350b46cbf63a84d8514fe16b69855"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b59e3e62d2be870e5c74af4e793293753565c7383ae82943b83383fdcf5cc5c1"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd8d3a19d4bd6dc5a7d4f358c8c3a60934dc058f363c34c0ac1e9e12a31421d"},
+    {file = "scikit_learn-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:5f57428de0c900a98389c4a433d4a3cf89de979b3aa24d1c1d251802aa15e44d"},
+    {file = "scikit_learn-1.5.1.tar.gz", hash = "sha256:0ea5d40c0e3951df445721927448755d3fe1d80833b0b7308ebff5d2a45e6414"},
 ]
 
 [package.dependencies]
 joblib = ">=1.2.0"
 numpy = ">=1.19.5"
 scipy = ">=1.6.0"
-threadpoolctl = ">=2.0.0"
+threadpoolctl = ">=3.1.0"
 
 [package.extras]
-benchmark = ["matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "pandas (>=1.1.5)"]
-docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"]
+benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"]
+build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"]
+docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"]
 examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"]
-tests = ["black (>=23.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.19.12)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.17.2)"]
+install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"]
+maintenance = ["conda-lock (==2.5.6)"]
+tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"]
 
 [[package]]
 name = "scipy"
@@ -5483,4 +5466,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "a3d88503a3cd47954b5f6ee349f8bce37199566a51ab607e09bde4e73ab3e629"
+content-hash = "614bfda2bba639b7ec0952bc6833cd5be0771abd15357232149028dc282b060e"
diff --git a/pyproject.toml b/pyproject.toml
index 6f6d058f4c..a9934d5f32 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -87,7 +87,7 @@ django-waffle = "^4.1.0"
 nameparser = "^1.1.3"
 eyecite = "*"
 scipy = "^1.11.4"
-scikit-learn = "^1.4.0"
+scikit-learn = "^1.5.1"
 numpy = "^1.26.3"
 datasketch = "^1.6.0"
 PyStemmer = "^2.2.0.1"

From dfd2c5e39cfd3a4e9794f0200b94a1a5f5059f07 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Mon, 12 Aug 2024 12:45:21 -0300
Subject: [PATCH 165/372] issue-4140 Initial commit. WIP: Add view, url model
 and button

---
 .../templates/includes/de_filter.html         |  26 +++-
 cl/opinion_page/urls.py                       |   7 +-
 cl/opinion_page/views.py                      | 113 ++++++++++++++----
 cl/search/models.py                           |  14 +++
 4 files changed, 132 insertions(+), 28 deletions(-)

diff --git a/cl/opinion_page/templates/includes/de_filter.html b/cl/opinion_page/templates/includes/de_filter.html
index a1946b23a2..e7258d6328 100644
--- a/cl/opinion_page/templates/includes/de_filter.html
+++ b/cl/opinion_page/templates/includes/de_filter.html
@@ -1,7 +1,8 @@
+{% load extras %}
 <div class="well well-sm">
   <div class="row filter-box">
     <!-- Search -->
-    <div class="col-xs-12 col-md-2">
+    <div class="col-xs-2 col-md-2">
       {% if docket_entries %}
         <div class="row" id="main-query-box">
           <div id="search-container" class="col-xs-12 text-center">
@@ -24,7 +25,8 @@
         </div>
       {% endif %}
     </div>
-    <form action="">
+    <div class="col-xs-8 col-md-8">
+      <form action="" >
       <div class="form-inline">
         {% if form.errors %}
           <div class="col-xs-12">
@@ -83,7 +85,7 @@
         </div>
         <!-- Navigate buttons -->
         <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
-          <div class="pull-right" >
+          <div>
             {% if docket_entries.has_previous %}
               <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
                 <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
@@ -102,10 +104,26 @@
                 <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
               </a>
             {% endif %}
-          </div>
         </div>
         <button type="submit" class="sr-only">Filter</button>
       </div>
     </form>
+    </div>
+    <div class="pull-right col-xs-2 col-md-2" >
+      <form action="{% url 'view_download_docket' %}" method="post">
+        {% csrf_token %}
+        <input type="hidden" name="court_id" value="{{ docket.court_id }}">
+        <input type="hidden" name="case_name" value="{{ docket.slug }}">
+        <input type="hidden" name="court_listener_id" value="{{ docket.id }}">
+        <input type="hidden" name="docket_id" value="{{ docket.id }}">
+
+        <input type="hidden" name="docket_entries" value="{{ docket_entries }}">
+        <button type="submit" class="btn btn-primary">
+            <i class="fa fa-download"></i>&nbsp;<span class="hidden-md">Download</span>
+        </button>
+      </form>
+
+    </div>
   </div>
 </div>
+
diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index 28b45319c6..812366ef0f 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -19,7 +19,7 @@
     view_recap_authorities,
     view_recap_document,
     view_summaries,
-)
+    download_docket_entries_csv)
 
 urlpatterns = [
     # Court pages
@@ -54,6 +54,11 @@
     path(
         "docket/<int:pk>/<blank-slug:slug>/", view_docket, name="view_docket"  # type: ignore[arg-type]
     ),
+    path(
+        "docket/download/",
+        download_docket_entries_csv, # type: ignore[arg-type]
+        name="view_download_docket"
+    ),
     path(
         "recap/gov.uscourts.<str:court>.<str:pacer_case_id>/",
         redirect_docket_recap,  # type: ignore[arg-type]
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 11c2fb0c74..87a96c9f75 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -1,7 +1,10 @@
+import csv
 import datetime
+import logging
 from collections import OrderedDict, defaultdict
 from http import HTTPStatus
-from typing import Any, Dict, Union
+from io import StringIO
+from typing import Any, Dict, Union, List
 from urllib.parse import urlencode
 
 import eyecite
@@ -336,35 +339,49 @@ async def redirect_docket_recap(
     )
 
 
+async def fetch_docket_entries(request, docket, form=None):
+    """ Fetch docket entries asociated to docket
+
+    param request: current HttpRequest.
+    param docket: docket.id to get related docket_entries.
+
+    returns: DocketEntry list.
+    """
+    de_list = docket.docket_entries.all().prefetch_related(
+      Prefetch("recap_documents", queryset=RECAPDocument.objects.defer("plain_text"))
+    )
+    if await sync_to_async(form.is_valid)():
+        if not form:
+            cd = form.cleaned_data
+
+            if cd.get("entry_gte"):
+              de_list = de_list.filter(entry_number__gte=cd["entry_gte"])
+            if cd.get("entry_lte"):
+              de_list = de_list.filter(entry_number__lte=cd["entry_lte"])
+            if cd.get("filed_after"):
+              de_list = de_list.filter(date_filed__gte=cd["filed_after"])
+            if cd.get("filed_before"):
+              de_list = de_list.filter(date_filed__lte=cd["filed_before"])
+            if cd.get("order_by") == DocketEntryFilterForm.DESCENDING:
+              de_list = de_list.order_by(
+                  "-recap_sequence_number", "-entry_number"
+              )
+    return de_list
+
+
 async def view_docket(
     request: HttpRequest, pk: int, slug: str
 ) -> HttpResponse:
+
+    sort_order_asc = True
+    form = DocketEntryFilterForm(request.GET, request=request)
     docket, context = await core_docket_data(request, pk)
     await increment_view_count(docket, request)
-    sort_order_asc = True
+
+    de_list = await fetch_docket_entries(request, docket, form)
 
     page = request.GET.get("page", 1)
-    rd_queryset = RECAPDocument.objects.defer("plain_text")
-    de_list = docket.docket_entries.all().prefetch_related(
-        Prefetch("recap_documents", queryset=rd_queryset)
-    )
-    form = DocketEntryFilterForm(request.GET, request=request)
-    if await sync_to_async(form.is_valid)():
-        cd = form.cleaned_data
-
-        if cd.get("entry_gte"):
-            de_list = de_list.filter(entry_number__gte=cd["entry_gte"])
-        if cd.get("entry_lte"):
-            de_list = de_list.filter(entry_number__lte=cd["entry_lte"])
-        if cd.get("filed_after"):
-            de_list = de_list.filter(date_filed__gte=cd["filed_after"])
-        if cd.get("filed_before"):
-            de_list = de_list.filter(date_filed__lte=cd["filed_before"])
-        if cd.get("order_by") == DocketEntryFilterForm.DESCENDING:
-            sort_order_asc = False
-            de_list = de_list.order_by(
-                "-recap_sequence_number", "-entry_number"
-            )
+
 
     @sync_to_async
     def paginate_docket_entries(docket_entries, docket_page):
@@ -563,6 +580,56 @@ async def make_thumb_if_needed(
     return rd
 
 
+async def download_docket_entries_csv(
+    request: HttpRequest
+) -> HttpResponse:
+    """Download csv file containing list of DocketEntry for specific Docket
+    """
+    def generate_csv(de_list, filename):
+        # Create file in memory. Should I create it in /tmp?
+        output: StringIO = StringIO()
+        csvwriter = csv.writer(output, quotechar='"',
+                               quoting=csv.QUOTE_ALL)
+        columns = []
+
+
+        for docket_entry in de_list:
+            if not columns:
+                columns = docket_entry.get_csv_columns()
+                csvwriter.writerow(columns)
+            csvwriter.writerow(docket_entry.to_csv_row())
+
+        csv_content: str = output.getvalue()
+        output.close()
+
+        response: HttpResponse = HttpResponse(
+            csv_content,
+            content_type='text/csv'
+        )
+        response['Content-Disposition'] = f'attachment; filename="{filename}"'
+        logger = logging.getLogger("cl.opinion_page")
+        logger.debug(f"HERE: {filename}")
+        return response
+
+    if request.method == 'POST':
+        case_name = request.POST.get("case_name", "lala")
+        court_id = request.POST.get("court_id", "lele")
+        court_listener_id = request.POST.get("court_listener_id", "lili")
+        docket_id = request.POST.get("docket_id")
+
+        form = DocketEntryFilterForm(request.POST, request=request)
+        docket, _ = await core_docket_data(request, docket_id)
+        de_list = await fetch_docket_entries(request, docket, form)
+
+        date_str = datetime.datetime.now().strftime("%Y-%m-%d")
+        filename = f"{case_name}.{court_id}.{court_listener_id}_{date_str}.csv"
+
+        response = await sync_to_async(generate_csv)(de_list, filename)
+        return response
+    return HttpResponseBadRequest("Invalid request method.")
+
+
+
 async def view_recap_document(
     request: HttpRequest,
     docket_id: int | None = None,
diff --git a/cl/search/models.py b/cl/search/models.py
index 478f3f4f80..07807d2e21 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -1250,6 +1250,20 @@ def datetime_filed(self) -> datetime | None:
             )
         return None
 
+    def get_csv_columns(self):
+        return [
+            "id",
+            "entry_number",
+            "date_filed",
+            "time_filed",
+            "pacer_sequence_number",
+            "recap_sequence_number",
+            "description"
+        ]
+
+    def to_csv_row(self) -> List[str]:
+        return [getattr(self, field) for field in self.get_csv_columns()]
+
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot(), obj_field=None)
 class DocketEntryTags(DocketEntry.tags.through):

From a642d075e6491cf1177233540d96f65d41c25e90 Mon Sep 17 00:00:00 2001
From: Mike Lissner <mike@free.law>
Date: Mon, 12 Aug 2024 10:17:03 -0700
Subject: [PATCH 166/372] feat(docstring): Add details about vuln risk

---
 cl/lib/model_helpers.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index ab3c93df5c..5e392c5105 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -492,6 +492,22 @@ def suppress_autotime(model, fields):
 def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
     """Make an originating docket number for an appellate case into a link (MVP version)
 
+    **NOTE: These links are presented to users and should be subject to strict security checks.**
+
+    For example, each regex should be carefully written so it accepts only the narrowest of
+    matches. The risk is that:
+
+      - Mallory uploads a bad document via the RECAP APIs (these are open APIs).
+      - The code here parses that upload in a way to create a redirect on the federalregister.gov
+        website. 
+      - federalregsiter.gov has an open redirect vulnerability (these are common).
+      - The user clicks a link on our site that goes to federalregister.gov, which redirects the
+        user to evilsite.com (b/c evilsite.com got through our checks here).
+      - The user is tricked on that site into doing something bad.
+  
+    This is all quite unlikely, but we can ensure it doesn't happen by being strict about 
+    the inputs our regular expressions capture.
+
     :param agency: The administrative agency the case originated from
     :param og_docket_number: The docket number where the case was originally heard.
     :returns: A linkified version of the docket number for the user to click on, or the original if no link can be made.

From 10b510745c9b3451a1d704b260b85f83f299ac1b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 17:17:40 +0000
Subject: [PATCH 167/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/lib/model_helpers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 5e392c5105..00093cd816 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -499,13 +499,13 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
 
       - Mallory uploads a bad document via the RECAP APIs (these are open APIs).
       - The code here parses that upload in a way to create a redirect on the federalregister.gov
-        website. 
+        website.
       - federalregsiter.gov has an open redirect vulnerability (these are common).
       - The user clicks a link on our site that goes to federalregister.gov, which redirects the
         user to evilsite.com (b/c evilsite.com got through our checks here).
       - The user is tricked on that site into doing something bad.
-  
-    This is all quite unlikely, but we can ensure it doesn't happen by being strict about 
+
+    This is all quite unlikely, but we can ensure it doesn't happen by being strict about
     the inputs our regular expressions capture.
 
     :param agency: The administrative agency the case originated from

From c02d0eabd118dafc36dec17cccc868bac9f8c7fe Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Mon, 12 Aug 2024 10:40:04 -0700
Subject: [PATCH 168/372] feat(settings): Throttle unresponsive user

---
 cl/settings/third_party/rest_framework.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cl/settings/third_party/rest_framework.py b/cl/settings/third_party/rest_framework.py
index 0f8aa17df4..a8d5ac2865 100644
--- a/cl/settings/third_party/rest_framework.py
+++ b/cl/settings/third_party/rest_framework.py
@@ -28,6 +28,8 @@
     },
     "OVERRIDE_THROTTLE_RATES": {
         # Throttling down.
+        # Didn't respone to emails, 2024-08-12
+        "fruitfranky": "10/hour",
         # Email non-functional; making many requests, 2024-04-23
         "NicolasMadan": "10/hour",
         # Didn't respond to emails, 2023-10-02

From aa4e9526cacb5cf2161cf2c7fd36c8a05c876f95 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 12 Aug 2024 14:49:55 -0400
Subject: [PATCH 169/372] fix(admin): Add parent_docket field as input for
 editing

---
 cl/search/admin.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cl/search/admin.py b/cl/search/admin.py
index 00e7776dee..b1e87d995b 100644
--- a/cl/search/admin.py
+++ b/cl/search/admin.py
@@ -285,6 +285,7 @@ class DocketAdmin(CursorPaginatorAdmin):
         "referred_to",
         "originating_court_information",
         "idb_data",
+        "parent_docket",
     )
 
     def save_model(

From 7a7906c87eae1b2ff2fe8ec264f9bfa43f3d80d2 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Mon, 12 Aug 2024 20:45:42 -0300
Subject: [PATCH 170/372] issue-4140 add RECAPDocument data to csv

---
 .../templates/includes/de_filter.html         |   1 -
 cl/opinion_page/views.py                      |  13 +-
 cl/search/models.py                           | 112 ++++++++++++++++--
 3 files changed, 113 insertions(+), 13 deletions(-)

diff --git a/cl/opinion_page/templates/includes/de_filter.html b/cl/opinion_page/templates/includes/de_filter.html
index e7258d6328..2110a51718 100644
--- a/cl/opinion_page/templates/includes/de_filter.html
+++ b/cl/opinion_page/templates/includes/de_filter.html
@@ -1,4 +1,3 @@
-{% load extras %}
 <div class="well well-sm">
   <div class="row filter-box">
     <!-- Search -->
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 87a96c9f75..bd95708c1a 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -593,11 +593,16 @@ def generate_csv(de_list, filename):
         columns = []
 
 
+        columns = de_list[0].get_csv_columns(get_column_name=True)
+        columns += de_list[0].recap_documents.first().get_csv_columns(get_column_name=True)
+        csvwriter.writerow(columns)
+
         for docket_entry in de_list:
-            if not columns:
-                columns = docket_entry.get_csv_columns()
-                csvwriter.writerow(columns)
-            csvwriter.writerow(docket_entry.to_csv_row())
+            row = docket_entry.to_csv_row()
+            for recap_doc in docket_entry.recap_documents.all():
+                row += recap_doc.to_csv_row()
+                csvwriter.writerow(row)
+
 
         csv_content: str = output.getvalue()
         output.close()
diff --git a/cl/search/models.py b/cl/search/models.py
index 07807d2e21..a81f67e39f 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -1128,8 +1128,48 @@ class Meta:
         proxy = True
 
 
+# @pghistory.track(AfterUpdateOrDeleteSnapshot(), obj_field=None)
+class CSVExportMixin:
+
+    def get_csv_columns(self, get_column_name=False) -> List[str]:
+        """Get list of column names required in a csv file.
+        If get column name is True. It will add class name to id
+
+        :param: get_column_name: bool. Whether add class name to primary attr name
+
+        :return: list of attrs of class to get into csv file"""
+        raise NotImplementedError(
+            "Subclass must implement get_csv_columns method"
+        )
+
+    def get_column_fuction(self) -> List[str]:
+        """Get dict of attrs: fucntion to apply on field value if it needs
+        to be pre-processed before being add to csv
+
+        returns: dict -- > {attr1: function}"""
+        raise NotImplementedError(
+            "Subclass must implement get_column_fuction method"
+        )
+
+    def to_csv_row(self) -> List[str]:
+        """Get fields in model based on attrs column names.
+        Apply function to attr value if required.
+        Return list of modified values for csv row"""
+        row = []
+        functions = self.get_column_fuction()
+        for field in self.get_csv_columns(get_column_name=False):
+            attr = getattr(self, field)
+            if not attr:
+                attr: ""
+            function = functions.get(field)
+            if function:
+                attr = function(field)
+            row.append(attr)
+        return row
+
+
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
-class DocketEntry(AbstractDateTimeModel):
+class DocketEntry(AbstractDateTimeModel, CSVExportMixin):
     docket = models.ForeignKey(
         Docket,
         help_text=(
@@ -1250,19 +1290,29 @@ def datetime_filed(self) -> datetime | None:
             )
         return None
 
-    def get_csv_columns(self):
-        return [
-            "id",
+    def get_csv_columns(self, get_column_name=False):
+        columns = []
+        if get_column_name:
+            columns.append(self.__class__.__name__.lower() + "_id")
+        else:
+            columns.append("id")
+        columns.extend([
             "entry_number",
             "date_filed",
             "time_filed",
             "pacer_sequence_number",
             "recap_sequence_number",
             "description"
-        ]
+        ])
+        return columns
+
+    def get_column_fuction(self):
+        """Get dict of attrs: fucntion to apply on field value if it needs
+        to be pre-processed before being add to csv
+
+        returns: dict -- > {attr1: function}"""
+        return {}
 
-    def to_csv_row(self) -> List[str]:
-        return [getattr(self, field) for field in self.get_csv_columns()]
 
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot(), obj_field=None)
@@ -1325,7 +1375,10 @@ class Meta:
 
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
-class RECAPDocument(AbstractPacerDocument, AbstractPDF, AbstractDateTimeModel):
+class RECAPDocument(AbstractPacerDocument,
+                    AbstractPDF,
+                    AbstractDateTimeModel,
+                    CSVExportMixin):
     """The model for Docket Documents and Attachments."""
 
     PACER_DOCUMENT = 1
@@ -1743,6 +1796,49 @@ def as_search_dict(self, docket_metadata=None):
 
         return normalize_search_dicts(out)
 
+    def get_csv_columns(self, get_column_name=False):
+        columns = []
+        if get_column_name:
+            columns.append(self.__class__.__name__.lower() + "_id")
+        else:
+            columns.append("id")
+        columns.extend([
+            "document_type",
+            "description",
+            "acms_document_guid",
+            "date_upload",
+            "document_number",
+            "attachment_number",
+            "pacer_doc_id",
+            "is_free_on_pacer",
+            "is_available",
+            "is_sealed",
+            "sha1",
+            "page_count",
+            "file_size",
+            "filepath_local",
+            "filepath_ia",
+            "ocr_status"
+        ])
+        return columns
+
+    def _get_readable_document_type(self, *args, **kwargs):
+        return self.get_document_type_display()
+
+    def _get_readable_ocr_status(self, *args, **kwargs):
+        return self.get_ocr_status_display()
+
+    def get_column_fuction(self):
+        """Get dict of attrs: function to apply on field value if it needs
+        to be pre-processed before being add to csv
+        If not functions returns empty dict
+
+        returns: dict -- > {attr1: function}"""
+        return {
+            "document_type": self._get_readable_document_type,
+            "ocr_status": self._get_readable_ocr_status,
+        }
+
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot(), obj_field=None)
 class RECAPDocumentTags(RECAPDocument.tags.through):

From 2946e5c73c9103608fa3951fecc6cd7c67a5a4dd Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 12 Aug 2024 19:31:47 -0500
Subject: [PATCH 171/372] fix(elasticsearch): Reduced RECAP child hits to 3

---
 cl/search/tests/tests_es_recap.py | 2 +-
 cl/settings/project/search.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 5a1740c3bf..8f9b8d4202 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -1315,7 +1315,7 @@ async def test_combine_filters(self) -> None:
             0, r.content.decode(), 1, "child filter + text query"
         )
 
-    @override_settings(VIEW_MORE_CHILD_HITS=6)
+    @override_settings(VIEW_MORE_CHILD_HITS=6, RECAP_CHILD_HITS_PER_RESULT=5)
     def test_docket_child_documents(self) -> None:
         """Confirm results contain the right number of child documents"""
         # Get results for a broad filter
diff --git a/cl/settings/project/search.py b/cl/settings/project/search.py
index 791249f694..cdff05e9db 100644
--- a/cl/settings/project/search.py
+++ b/cl/settings/project/search.py
@@ -68,7 +68,7 @@
 MAX_SEARCH_PAGINATION_DEPTH = 100
 SEARCH_PAGE_SIZE = 20
 RECAP_SEARCH_PAGE_SIZE = 10
-RECAP_CHILD_HITS_PER_RESULT = 5
+RECAP_CHILD_HITS_PER_RESULT = env("RECAP_CHILD_HITS_PER_RESULT", default=3)
 OPINION_HITS_PER_RESULT = 20
 PEOPLE_HITS_PER_RESULT = 999
 VIEW_MORE_CHILD_HITS = 99

From 4451a5fcfe745373e5bc1da051511df4968c2c7f Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Tue, 13 Aug 2024 00:30:47 -0300
Subject: [PATCH 172/372] issue-4140 back to original layout for de_filter plus
 new form only. WIP styling missing

---
 .../templates/includes/de_filter.html         | 192 +++++++++---------
 1 file changed, 95 insertions(+), 97 deletions(-)

diff --git a/cl/opinion_page/templates/includes/de_filter.html b/cl/opinion_page/templates/includes/de_filter.html
index 2110a51718..95bf1ff575 100644
--- a/cl/opinion_page/templates/includes/de_filter.html
+++ b/cl/opinion_page/templates/includes/de_filter.html
@@ -1,7 +1,7 @@
 <div class="well well-sm">
   <div class="row filter-box">
     <!-- Search -->
-    <div class="col-xs-2 col-md-2">
+    <div class="col-xs-12 col-md-2">
       {% if docket_entries %}
         <div class="row" id="main-query-box">
           <div id="search-container" class="col-xs-12 text-center">
@@ -24,105 +24,103 @@
         </div>
       {% endif %}
     </div>
-    <div class="col-xs-8 col-md-8">
-      <form action="" >
-      <div class="form-inline">
-        {% if form.errors %}
-          <div class="col-xs-12">
-            <p class="text-danger"><strong>There were errors applying your filters.</strong>
-            </p>
-          </div>
-        {% endif %}
-        <!-- date select -->
-        <div class="tight-input col-xs-6 col-sm-5 col-md-4 col-lg-3">
-          <div class="cl-form-group{% if form.filed_after.errors %} has-error{% endif %}">
-            <label for="id_filed_after"
-                   class="control-label">Filed</label>
-            {{ form.filed_after }}
-          </div>
-          <div class="cl-form-group{% if form.filed_before.errors %} has-error{% endif %}">
-            <label for="id_filed_before"
-                   class="control-label">to</label>
-            {{ form.filed_before }}
-          </div>
-        </div>
-        <!-- Document group -->
-        <div class="tight-input col-xs-6 col-sm-4 col-md-3">
-          <div class="cl-form-group{% if form.entry_gte.errors %} has-error{% endif %}">
-            <label for="id_entry_gte" class="control-label">Documents</label>
-            {{ form.entry_gte }}
-          </div>
-          <div class="cl-form-group{% if form.entry_lte.errors %} has-error{% endif %}">
-            <label for="id_entry_lte"
-                   class="control-label">to</label>
-            {{ form.entry_lte }}
-          </div>
-        </div>
-        <!-- sort ordering -->
-        <div class="tight-input col-xs-6 col-sm-3 col-md-2">
-          <div id="sort-buttons"
-               class="btn-group tight-input pull-right xs-pull-left cl-sm-pull-left"
-               data-toggle="buttons"
-               role="group"
-               aria-label="sorting buttons">
-            <label for="id_order_by_0"
-                   class="btn btn-default {% if sort_order_asc %}active{% endif %}">
-              <input type="radio"
-                     id="id_order_by_0"
-                     value="asc"
-                     name="order_by"
-                     {% if sort_order_asc %}checked="checked" {%endif %}/><i class="fa fa-sort-numeric-asc"></i>&nbsp;<span class="hidden-md">Asc.</span></label>
-            <label for="id_order_by_1"
-                   class="btn btn-default {% if not sort_order_asc %}active{% endif %}">
-              <input type="radio"
-                     id="id_order_by_1"
-                     class="btn btn-default"
-                     {% if not sort_order_asc %}checked="checked"{% endif %}
-                     value="desc"
-                     name="order_by"/><i class="fa fa-sort-numeric-desc"></i>&nbsp;<span class="hidden-md">Desc.</span></label>
-          </div>
-        </div>
-        <!-- Navigate buttons -->
-        <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
-          <div>
-            {% if docket_entries.has_previous %}
-              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
-                <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
-              </a>
-            {% else %}
-              <a class="btn btn-default disabled" href="#" rel="prev" >
-                <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
-              </a>
-            {% endif %}
-            {% if docket_entries.has_next %}
-              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
-                <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
-              </a>
-            {% else %}
-              <a class="btn btn-default disabled" href="#" rel="next" >
-                <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
-              </a>
+    <form action="" >
+          <div class="form-inline">
+            {% if form.errors %}
+              <div class="col-xs-12">
+                <p class="text-danger"><strong>There were errors applying your filters.</strong>
+                </p>
+              </div>
             {% endif %}
-        </div>
-        <button type="submit" class="sr-only">Filter</button>
-      </div>
-    </form>
-    </div>
-    <div class="pull-right col-xs-2 col-md-2" >
-      <form action="{% url 'view_download_docket' %}" method="post">
-        {% csrf_token %}
-        <input type="hidden" name="court_id" value="{{ docket.court_id }}">
-        <input type="hidden" name="case_name" value="{{ docket.slug }}">
-        <input type="hidden" name="court_listener_id" value="{{ docket.id }}">
-        <input type="hidden" name="docket_id" value="{{ docket.id }}">
+            <!-- date select -->
+            <div class="tight-input col-xs-6 col-sm-5 col-md-4 col-lg-3">
+              <div class="cl-form-group{% if form.filed_after.errors %} has-error{% endif %}">
+                <label for="id_filed_after"
+                       class="control-label">Filed</label>
+                {{ form.filed_after }}
+              </div>
+              <div class="cl-form-group{% if form.filed_before.errors %} has-error{% endif %}">
+                <label for="id_filed_before"
+                       class="control-label">to</label>
+                {{ form.filed_before }}
+              </div>
+            </div>
+            <!-- Document group -->
+            <div class="tight-input col-xs-6 col-sm-4 col-md-3">
+              <div class="cl-form-group{% if form.entry_gte.errors %} has-error{% endif %}">
+                <label for="id_entry_gte" class="control-label">Documents</label>
+                {{ form.entry_gte }}
+              </div>
+              <div class="cl-form-group{% if form.entry_lte.errors %} has-error{% endif %}">
+                <label for="id_entry_lte"
+                       class="control-label">to</label>
+                {{ form.entry_lte }}
+              </div>
+            </div>
+            <!-- sort ordering -->
+            <div class="tight-input col-xs-6 col-sm-3 col-md-2">
+              <div id="sort-buttons"
+                   class="btn-group tight-input pull-right xs-pull-left cl-sm-pull-left"
+                   data-toggle="buttons"
+                   role="group"
+                   aria-label="sorting buttons">
+                <label for="id_order_by_0"
+                       class="btn btn-default {% if sort_order_asc %}active{% endif %}">
+                  <input type="radio"
+                         id="id_order_by_0"
+                         value="asc"
+                         name="order_by"
+                         {% if sort_order_asc %}checked="checked" {%endif %}/><i class="fa fa-sort-numeric-asc"></i>&nbsp;<span class="hidden-md">Asc.</span></label>
+                <label for="id_order_by_1"
+                       class="btn btn-default {% if not sort_order_asc %}active{% endif %}">
+                  <input type="radio"
+                         id="id_order_by_1"
+                         class="btn btn-default"
+                         {% if not sort_order_asc %}checked="checked"{% endif %}
+                         value="desc"
+                         name="order_by"/><i class="fa fa-sort-numeric-desc"></i>&nbsp;<span class="hidden-md">Desc.</span></label>
+              </div>
+            </div>
+            <!-- Navigate buttons -->
+            <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
+              <div>
+                {% if docket_entries.has_previous %}
+                  <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
+                    <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
+                  </a>
+                {% else %}
+                  <a class="btn btn-default disabled" href="#" rel="prev" >
+                    <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
+                  </a>
+                {% endif %}
+                {% if docket_entries.has_next %}
+                  <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
+                    <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
+                  </a>
+                {% else %}
+                  <a class="btn btn-default disabled" href="#" rel="next" >
+                    <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
+                  </a>
+                {% endif %}
+            </div>
+            <button type="submit" class="sr-only">Filter</button>
+          </div>
+        </form>
+    <div>
+    <form action="{% url 'view_download_docket' %}" method="post" class="pull-right">
+      {% csrf_token %}
+      <input type="hidden" name="court_id" value="{{ docket.court_id }}">
+      <input type="hidden" name="case_name" value="{{ docket.slug }}">
+      <input type="hidden" name="court_listener_id" value="{{ docket.id }}">
+      <input type="hidden" name="docket_id" value="{{ docket.id }}">
+      <input type="hidden" name="docket_entries" value="{{ docket_entries }}">
 
-        <input type="hidden" name="docket_entries" value="{{ docket_entries }}">
-        <button type="submit" class="btn btn-primary">
-            <i class="fa fa-download"></i>&nbsp;<span class="hidden-md">Download</span>
-        </button>
-      </form>
+      <button type="submit" class="btn btn-primary btn-sm btn-test">
+        <i class="fa fa-download"></i>
+        <span class="d-none d-lg-inline">Download</span>
+    </button>
+    </form>
 
-    </div>
   </div>
 </div>
 

From 49aa2b7095dda0614423302c4dcc259c5dc2e1db Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 12 Aug 2024 22:36:08 -0500
Subject: [PATCH 173/372] refactor(scrapers.utils.get_binary_content)

- Remove "method" argument
- Log errors instead of returning the error message
- Return the cleaned up content using site.cleanup_content
- Update tests
- Update opinions and oral arguments scraper caller to reflect changes
---
 .../management/commands/cl_scrape_opinions.py | 12 ++-----
 .../commands/cl_scrape_oral_arguments.py      | 12 ++-----
 cl/scrapers/tests.py                          | 24 +++++++-------
 cl/scrapers/utils.py                          | 33 +++++++++++--------
 4 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index 59b4626a15..a147da1182 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -277,18 +277,10 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
         logger.debug(f"#{len(site)} opinions found.")
         added = 0
         for i, item in enumerate(site):
-            msg, r = get_binary_content(
-                item["download_urls"],
-                site,
-                method=site.method,
-            )
-            if msg:
-                fingerprint = [f"{court_str}-unexpected-content-type"]
-                logger.error(msg, extra={"fingerprint": fingerprint})
+            content = get_binary_content(item["download_urls"], site)
+            if not content:
                 continue
 
-            content = site.cleanup_content(r.content)
-
             current_date = item["case_dates"]
             try:
                 next_date = site[i + 1]["case_dates"]
diff --git a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
index ab19cac0f5..a3d36ed7bb 100644
--- a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
@@ -125,18 +125,10 @@ def scrape_court(
         if site.cookies:
             logger.info(f"Using cookies: {site.cookies}")
         for i, item in enumerate(site):
-            msg, r = get_binary_content(
-                item["download_urls"],
-                site,
-                method=site.method,
-            )
-            if msg:
-                fingerprint = [f"{court_str}-unexpected-content-type"]
-                logger.error(msg, extra={"fingerprint": fingerprint})
+            content = get_binary_content(item["download_urls"], site)
+            if not content:
                 continue
 
-            content = site.cleanup_content(r.content)
-
             current_date = item["case_dates"]
             try:
                 next_date = site[i + 1]["case_dates"]
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index dba2479e45..497a491d4d 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -633,8 +633,9 @@ def test_unexpected_content_type(self, mock_get):
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = ["text/html"]
 
-        msg, _ = get_binary_content("/dummy/url/", self.site)
-        self.assertIn("UnexpectedContentTypeError:", msg)
+        with self.assertLogs(level="ERROR") as cm:
+            get_binary_content("/dummy/url/", self.site)
+        self.assertIn("UnexpectedContentTypeError:", cm.output[0])
 
     @mock.patch("requests.Session.get")
     def test_correct_content_type(self, mock_get):
@@ -642,15 +643,14 @@ def test_correct_content_type(self, mock_get):
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = ["application/pdf"]
 
-        msg, _ = get_binary_content("/dummy/url/", self.site)
-        self.assertEqual("", msg)
+        with self.assertNoLogs(level="ERROR"):
+            _ = get_binary_content("/dummy/url/", self.site)
 
-        self.mock_response.headers = {
-            "Content-Type": "application/pdf;charset=utf-8"
-        }
-        mock_get.return_value = self.mock_response
-        msg, _ = get_binary_content("/dummy/url/", self.site)
-        self.assertEqual("", msg)
+            self.mock_response.headers = {
+                "Content-Type": "application/pdf;charset=utf-8"
+            }
+            mock_get.return_value = self.mock_response
+            _ = get_binary_content("/dummy/url/", self.site)
 
     @mock.patch("requests.Session.get")
     def test_no_content_type(self, mock_get):
@@ -658,5 +658,5 @@ def test_no_content_type(self, mock_get):
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = None
 
-        msg, _ = get_binary_content("/dummy/url/", self.site)
-        self.assertEqual("", msg)
+        with self.assertNoLogs(level="ERROR"):
+            _ = get_binary_content("/dummy/url/", self.site)
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index b5e9fe51c1..15ef2bda7e 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -155,25 +155,29 @@ def get_extension(content: bytes) -> str:
 def get_binary_content(
     download_url: str,
     site: AbstractSite,
-    method: str = "GET",
-) -> Tuple[str, Optional[Response]]:
+) -> Optional[bytes | str]:
     """Downloads the file, covering a few special cases such as invalid SSL
     certificates and empty file errors.
 
     :param download_url: The URL for the item you wish to download.
     :param site: Site object used to download data
-    :param method: The HTTP method used to get the item, or "LOCAL" to get an
-    item during testing
+
     :return: Two values. The first is a msg indicating any errors encountered.
     If blank, that indicates success. The second value is the response object
     containing the downloaded file.
     """
+    court_str = site.court_id.split(".")[-1].split("_")[0]
+    fingerprint = [f"{court_str}-unexpected-content-type"]
+
     if not download_url:
         # Occurs when a DeferredList fetcher fails.
-        msg = f"NoDownloadUrlError: {download_url}\n{traceback.format_exc()}"
-        return msg, None
+        error = f"NoDownloadUrlError: {download_url}\n{traceback.format_exc()}"
+        logger.error(error, extra={"fingerprint": fingerprint})
+        return
+
     # noinspection PyBroadException
-    if method == "LOCAL":
+    if site.method == "LOCAL":
+        # "LOCAL" is the method when testing
         url = os.path.join(settings.MEDIA_ROOT, download_url)
         mr = MockRequest(url=url)
         r = mr.get()
@@ -203,8 +207,9 @@ def get_binary_content(
 
         # test for empty files (thank you CA1)
         if len(r.content) == 0:
-            msg = f"EmptyFileError: {download_url}\n{traceback.format_exc()}"
-            return msg, None
+            error = f"EmptyFileError: {download_url}\n{traceback.format_exc()}"
+            logger.error(error, extra={"fingerprint": fingerprint})
+            return
 
         # test for expected content type (thanks mont for nil)
         if site.expected_content_types:
@@ -218,18 +223,20 @@ def get_binary_content(
                 for mime in site.expected_content_types
             )
             if not m:
-                msg = (
+                error = (
                     f"UnexpectedContentTypeError: {download_url}\n"
                     f'\'"{content_type}" not in {site.expected_content_types}'
                 )
-                return msg, None
+                logger.error(error, extra={"fingerprint": fingerprint})
+                return
 
         # test for and follow meta redirects
         r = follow_redirections(r, s)
         r.raise_for_status()
 
-    # Success!
-    return "", r
+    content = site.cleanup_content(r.content)
+
+    return content
 
 
 def signal_handler(signal, frame):

From 5fb7ba9ab2c4ab813fc8c062dec812286a5d0e51 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 12 Aug 2024 22:36:38 -0500
Subject: [PATCH 174/372] feat(cl_back_scrape_citations): command to scrape
 citations

---
 .../commands/cl_back_scrape_citations.py      | 115 ++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 cl/scrapers/management/commands/cl_back_scrape_citations.py

diff --git a/cl/scrapers/management/commands/cl_back_scrape_citations.py b/cl/scrapers/management/commands/cl_back_scrape_citations.py
new file mode 100644
index 0000000000..43fa7a3f34
--- /dev/null
+++ b/cl/scrapers/management/commands/cl_back_scrape_citations.py
@@ -0,0 +1,115 @@
+"""
+When opinions are first published on the courts' sites, they won't have
+all their citations assigned. Some courts will publish the citations
+in the same pages we scrape, but months later
+
+This command re-uses the (back)scraper we use to get opinions, to get
+the lagged citations and associate them with the Opinions we first
+downloaded. If we find an Opinion we don't have in the database,
+we ingest it as in a regular scrape
+"""
+
+from django.utils.encoding import force_bytes
+
+from cl.lib.crypto import sha1
+from cl.scrapers.management.commands import cl_back_scrape_opinions
+from cl.scrapers.management.commands.cl_scrape_opinions import make_citation
+from cl.scrapers.utils import get_binary_content
+from cl.search.models import Opinion, Citation, OpinionCluster
+from cl.lib.command_utils import logger
+
+
+class Command(cl_back_scrape_opinions.Command):
+    def scrape_court(self, site, full_crawl=False, ocr_available=True):
+        """
+        If the scraped case has citation data
+            Check for Opinion existance via content hash
+            If we have the Opinion
+                if we don't have the citation -> ingest
+                if we already have the citation -> pass
+            If we don't have the Opinion
+                ingest the opinion with it's citation, that is to say,
+                use the regular scraping process!
+
+        :param site: scraper object that has already downloaded
+            it's case data
+        """
+        missing_opinions = []
+        court_str = site.court_id.split(".")[-1].split("_")[0]
+
+        for case in site:
+            citation = case.get("citation")
+            parallel_citation = case.get("parallel_citation")
+            if not citation and not parallel_citation:
+                continue
+
+            content = get_binary_content(case["download_urls"], site)
+            if not content:
+                # Errors are logged by get_binary_content itself
+                continue
+            sha1_hash = sha1(force_bytes(content))
+
+            try:
+                cluster = Opinion.objects.get(sha1=sha1_hash).cluster
+            except Opinion.DoesNotExist:
+                missing_opinions.append(case)
+                logger.info(
+                    "Opinion with URL '%s' does not exist. Has citation '%s'. Will try to ingest all objects",
+                    case["download_urls"],
+                    citation or parallel_citation,
+                )
+                continue
+
+            for cite in [citation, parallel_citation]:
+                if not cite:
+                    continue
+
+                citation_candidate = make_citation(cite, cluster, court_str)
+                if not citation_candidate:
+                    continue
+
+                if self.citation_is_duplicated(
+                    citation_candidate, cluster, cite
+                ):
+                    continue
+
+                logger.info("Saving citation %s for cluster %s", cite, cluster)
+                citation_candidate.save()
+
+        # We don't have these opinions. Since we are backscraping, if the citation
+        # exists, it will be in the case dictionary, and will be saved in a
+        # regular ingestion process
+        if missing_opinions:
+            site.cases = missing_opinions
+            super().scrape_court(site, full_crawl=True)
+
+    def citation_is_duplicated(
+        self, citation_candidate: Citation, cluster: OpinionCluster, cite: str
+    ) -> bool:
+        """Checks for exact or reporter duplication of citation in the cluster
+        """
+        citation_params = citation_candidate.__dict__
+        citation_params.pop("_state", "")
+        citation_params.pop("id", "")
+
+        # Exact duplication
+        if Citation.objects.filter(**citation_params).exists():
+            logger.info(
+                "Citation '%s' already exists for cluster %s",
+                cite,
+                cluster.id,
+            )
+            return True
+
+        # Duplication in the same reporter
+        if Citation.objects.filter(
+            cluster_id=cluster.id, reporter=citation_candidate.reporter
+        ).exists():
+            logger.info(
+                "Another citation in the same reporter '%s' exists for cluster %s",
+                citation_candidate.reporter,
+                cluster.id,
+            )
+            return True
+
+        return False

From 22945159f325391ac773ebf5afdb1541076265c8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 03:39:02 +0000
Subject: [PATCH 175/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../management/commands/cl_back_scrape_citations.py        | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cl/scrapers/management/commands/cl_back_scrape_citations.py b/cl/scrapers/management/commands/cl_back_scrape_citations.py
index 43fa7a3f34..8fd59843c8 100644
--- a/cl/scrapers/management/commands/cl_back_scrape_citations.py
+++ b/cl/scrapers/management/commands/cl_back_scrape_citations.py
@@ -11,12 +11,12 @@
 
 from django.utils.encoding import force_bytes
 
+from cl.lib.command_utils import logger
 from cl.lib.crypto import sha1
 from cl.scrapers.management.commands import cl_back_scrape_opinions
 from cl.scrapers.management.commands.cl_scrape_opinions import make_citation
 from cl.scrapers.utils import get_binary_content
-from cl.search.models import Opinion, Citation, OpinionCluster
-from cl.lib.command_utils import logger
+from cl.search.models import Citation, Opinion, OpinionCluster
 
 
 class Command(cl_back_scrape_opinions.Command):
@@ -86,8 +86,7 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
     def citation_is_duplicated(
         self, citation_candidate: Citation, cluster: OpinionCluster, cite: str
     ) -> bool:
-        """Checks for exact or reporter duplication of citation in the cluster
-        """
+        """Checks for exact or reporter duplication of citation in the cluster"""
         citation_params = citation_candidate.__dict__
         citation_params.pop("_state", "")
         citation_params.pop("id", "")

From dd93e5218479177ad22271a86929bb07dfc7f572 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Tue, 13 Aug 2024 00:41:06 -0300
Subject: [PATCH 176/372] issue-4140 back to original layout for de_filter plus
 new form only. WIP styling missing

---
 .../templates/includes/de_filter.html         | 164 +++++++++---------
 1 file changed, 81 insertions(+), 83 deletions(-)

diff --git a/cl/opinion_page/templates/includes/de_filter.html b/cl/opinion_page/templates/includes/de_filter.html
index 95bf1ff575..a21cb34d79 100644
--- a/cl/opinion_page/templates/includes/de_filter.html
+++ b/cl/opinion_page/templates/includes/de_filter.html
@@ -24,89 +24,89 @@
         </div>
       {% endif %}
     </div>
-    <form action="" >
-          <div class="form-inline">
-            {% if form.errors %}
-              <div class="col-xs-12">
-                <p class="text-danger"><strong>There were errors applying your filters.</strong>
-                </p>
-              </div>
+    <form action="">
+      <div class="form-inline">
+        {% if form.errors %}
+          <div class="col-xs-12">
+            <p class="text-danger"><strong>There were errors applying your filters.</strong>
+            </p>
+          </div>
+        {% endif %}
+        <!-- date select -->
+        <div class="tight-input col-xs-6 col-sm-5 col-md-4 col-lg-3">
+          <div class="cl-form-group{% if form.filed_after.errors %} has-error{% endif %}">
+            <label for="id_filed_after"
+                   class="control-label">Filed</label>
+            {{ form.filed_after }}
+          </div>
+          <div class="cl-form-group{% if form.filed_before.errors %} has-error{% endif %}">
+            <label for="id_filed_before"
+                   class="control-label">to</label>
+            {{ form.filed_before }}
+          </div>
+        </div>
+        <!-- Document group -->
+        <div class="tight-input col-xs-6 col-sm-4 col-md-3">
+          <div class="cl-form-group{% if form.entry_gte.errors %} has-error{% endif %}">
+            <label for="id_entry_gte" class="control-label">Documents</label>
+            {{ form.entry_gte }}
+          </div>
+          <div class="cl-form-group{% if form.entry_lte.errors %} has-error{% endif %}">
+            <label for="id_entry_lte"
+                   class="control-label">to</label>
+            {{ form.entry_lte }}
+          </div>
+        </div>
+        <!-- sort ordering -->
+        <div class="tight-input col-xs-6 col-sm-3 col-md-2">
+          <div id="sort-buttons"
+               class="btn-group tight-input pull-right xs-pull-left cl-sm-pull-left"
+               data-toggle="buttons"
+               role="group"
+               aria-label="sorting buttons">
+            <label for="id_order_by_0"
+                   class="btn btn-default {% if sort_order_asc %}active{% endif %}">
+              <input type="radio"
+                     id="id_order_by_0"
+                     value="asc"
+                     name="order_by"
+                     {% if sort_order_asc %}checked="checked" {%endif %}/><i class="fa fa-sort-numeric-asc"></i>&nbsp;<span class="hidden-md">Asc.</span></label>
+            <label for="id_order_by_1"
+                   class="btn btn-default {% if not sort_order_asc %}active{% endif %}">
+              <input type="radio"
+                     id="id_order_by_1"
+                     class="btn btn-default"
+                     {% if not sort_order_asc %}checked="checked"{% endif %}
+                     value="desc"
+                     name="order_by"/><i class="fa fa-sort-numeric-desc"></i>&nbsp;<span class="hidden-md">Desc.</span></label>
+          </div>
+        </div>
+        <!-- Navigate buttons -->
+        <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
+          <div class="pull-right" >
+            {% if docket_entries.has_previous %}
+              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
+                <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
+              </a>
+            {% else %}
+              <a class="btn btn-default disabled" href="#" rel="prev" >
+                <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
+              </a>
+            {% endif %}
+            {% if docket_entries.has_next %}
+              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
+                <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
+              </a>
+            {% else %}
+              <a class="btn btn-default disabled" href="#" rel="next" >
+                <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
+              </a>
             {% endif %}
-            <!-- date select -->
-            <div class="tight-input col-xs-6 col-sm-5 col-md-4 col-lg-3">
-              <div class="cl-form-group{% if form.filed_after.errors %} has-error{% endif %}">
-                <label for="id_filed_after"
-                       class="control-label">Filed</label>
-                {{ form.filed_after }}
-              </div>
-              <div class="cl-form-group{% if form.filed_before.errors %} has-error{% endif %}">
-                <label for="id_filed_before"
-                       class="control-label">to</label>
-                {{ form.filed_before }}
-              </div>
-            </div>
-            <!-- Document group -->
-            <div class="tight-input col-xs-6 col-sm-4 col-md-3">
-              <div class="cl-form-group{% if form.entry_gte.errors %} has-error{% endif %}">
-                <label for="id_entry_gte" class="control-label">Documents</label>
-                {{ form.entry_gte }}
-              </div>
-              <div class="cl-form-group{% if form.entry_lte.errors %} has-error{% endif %}">
-                <label for="id_entry_lte"
-                       class="control-label">to</label>
-                {{ form.entry_lte }}
-              </div>
-            </div>
-            <!-- sort ordering -->
-            <div class="tight-input col-xs-6 col-sm-3 col-md-2">
-              <div id="sort-buttons"
-                   class="btn-group tight-input pull-right xs-pull-left cl-sm-pull-left"
-                   data-toggle="buttons"
-                   role="group"
-                   aria-label="sorting buttons">
-                <label for="id_order_by_0"
-                       class="btn btn-default {% if sort_order_asc %}active{% endif %}">
-                  <input type="radio"
-                         id="id_order_by_0"
-                         value="asc"
-                         name="order_by"
-                         {% if sort_order_asc %}checked="checked" {%endif %}/><i class="fa fa-sort-numeric-asc"></i>&nbsp;<span class="hidden-md">Asc.</span></label>
-                <label for="id_order_by_1"
-                       class="btn btn-default {% if not sort_order_asc %}active{% endif %}">
-                  <input type="radio"
-                         id="id_order_by_1"
-                         class="btn btn-default"
-                         {% if not sort_order_asc %}checked="checked"{% endif %}
-                         value="desc"
-                         name="order_by"/><i class="fa fa-sort-numeric-desc"></i>&nbsp;<span class="hidden-md">Desc.</span></label>
-              </div>
-            </div>
-            <!-- Navigate buttons -->
-            <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
-              <div>
-                {% if docket_entries.has_previous %}
-                  <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
-                    <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
-                  </a>
-                {% else %}
-                  <a class="btn btn-default disabled" href="#" rel="prev" >
-                    <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
-                  </a>
-                {% endif %}
-                {% if docket_entries.has_next %}
-                  <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
-                    <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
-                  </a>
-                {% else %}
-                  <a class="btn btn-default disabled" href="#" rel="next" >
-                    <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
-                  </a>
-                {% endif %}
-            </div>
-            <button type="submit" class="sr-only">Filter</button>
           </div>
-        </form>
-    <div>
+        </div>
+        <button type="submit" class="sr-only">Filter</button>
+      </div>
+    </form>
     <form action="{% url 'view_download_docket' %}" method="post" class="pull-right">
       {% csrf_token %}
       <input type="hidden" name="court_id" value="{{ docket.court_id }}">
@@ -120,7 +120,5 @@
         <span class="d-none d-lg-inline">Download</span>
     </button>
     </form>
-
   </div>
 </div>
-

From aa3c4f3abea2835bf6a9e083307a67a756e0b108 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Tue, 13 Aug 2024 00:43:51 -0300
Subject: [PATCH 177/372] issue-4140 delete a class from div. WIP styling
 missing

---
 cl/opinion_page/templates/includes/de_filter.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/opinion_page/templates/includes/de_filter.html b/cl/opinion_page/templates/includes/de_filter.html
index a21cb34d79..4941901855 100644
--- a/cl/opinion_page/templates/includes/de_filter.html
+++ b/cl/opinion_page/templates/includes/de_filter.html
@@ -83,7 +83,7 @@
         </div>
         <!-- Navigate buttons -->
         <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
-          <div class="pull-right" >
+          <div  >
             {% if docket_entries.has_previous %}
               <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
                 <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>

From 56f57ee52e9672ad6069d79d9029b7fb45bb3c36 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 13 Aug 2024 10:04:03 -0500
Subject: [PATCH 178/372] fix(elasticsearch): Use elasticsearch to look for a
 query citation on the frontend

Fixes: #4211
---
 cl/citations/match_citations_queries.py | 32 +++++++++++++++++++++++-
 cl/search/tests/tests_es_opinion.py     | 33 +++++++++++++++++++++++++
 cl/search/views.py                      |  3 ++-
 3 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/cl/citations/match_citations_queries.py b/cl/citations/match_citations_queries.py
index 8971ad5ce1..0e6ed1abe8 100644
--- a/cl/citations/match_citations_queries.py
+++ b/cl/citations/match_citations_queries.py
@@ -4,7 +4,9 @@
 from elasticsearch_dsl import Q
 from elasticsearch_dsl.query import Query
 from elasticsearch_dsl.response import Hit, Response
+from eyecite import get_citations
 from eyecite.models import FullCaseCitation
+from eyecite.tokenizers import HyperscanTokenizer
 
 from cl.citations.types import SupportedCitationType
 from cl.citations.utils import (
@@ -12,9 +14,12 @@
     get_years_from_reporter,
     make_name_param,
 )
+from cl.lib.types import CleanData
 from cl.search.documents import OpinionDocument
 from cl.search.models import Opinion
 
+HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")
+
 
 def fetch_citations(search_query: Search) -> list[Hit]:
     """Fetches citation matches from Elasticsearch based on the provided
@@ -27,7 +32,9 @@ def fetch_citations(search_query: Search) -> list[Hit]:
     citation_hits = []
     search_query = search_query.sort("id")
     # Only retrieve fields required for the lookup.
-    search_query = search_query.source(includes=["id", "caseName"])
+    search_query = search_query.source(
+        includes=["id", "caseName", "absolute_url", "dateFiled"]
+    )
     # Citation resolution aims for a single match. Setting up a size of 2 is
     # enough to determine if there is more than one match.
     search_query = search_query.extra(size=2)
@@ -200,3 +207,26 @@ def es_search_db_for_full_citation(
 
     # Give up.
     return []
+
+
+def es_get_query_citation(cd: CleanData) -> Hit | None:
+    """Extract citations from the query. If it's a single citation, search for
+     it into ES, and if found, return it.
+
+    :param cd: A CleanData instance.
+    :param return: An ES Hit object or None.
+    """
+
+    if not cd.get("q"):
+        return None
+    citations = get_citations(cd["q"], tokenizer=HYPERSCAN_TOKENIZER)
+    citations = [c for c in citations if isinstance(c, FullCaseCitation)]
+
+    matches = None
+    if len(citations) == 1:
+        # If it's not exactly one match, user doesn't get special help.
+        matches = es_search_db_for_full_citation(citations[0])
+        if len(matches) == 1:
+            # If more than one match, don't show the tip
+            return matches[0]
+    return matches
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 928dfdb03d..16d3a94516 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1978,6 +1978,39 @@ def test_frontend_opinions_count(self) -> None:
         counts_text = self._get_frontend_counts_text(r)
         self.assertIn("About 5,300 Opinions", counts_text)
 
+    def test_display_query_citation_frontend(self) -> None:
+        """Confirm if the query citation alert is shown on the frontend when
+        querying a single citation, and it's found into ES."""
+
+        with self.captureOnCommitCallbacks(execute=True):
+            cluster = OpinionClusterFactory.create(
+                precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
+                docket=self.docket_1,
+                date_filed=datetime.date(2024, 8, 23),
+            )
+            OpinionFactory.create(cluster=cluster, plain_text="")
+            CitationWithParentsFactory.create(
+                volume=31,
+                reporter="Pa. D. & C.",
+                page="445",
+                type=2,
+                cluster=cluster,
+            )
+
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "31 Pa. D. & C. 445",
+            "order_by": "score desc",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        self.assertIn(
+            "It looks like you're trying to search for", r.content.decode()
+        )
+        cluster.delete()
+
 
 class RelatedSearchTest(
     ESIndexTestCase, CourtTestCase, PeopleTestCase, SearchTestCase, TestCase
diff --git a/cl/search/views.py b/cl/search/views.py
index aeef38c3f6..3f4cf21c22 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -28,6 +28,7 @@
 from cl.alerts.forms import CreateAlertForm
 from cl.alerts.models import Alert
 from cl.audio.models import Audio
+from cl.citations.match_citations_queries import es_get_query_citation
 from cl.custom_filters.templatetags.text_filters import naturalduration
 from cl.lib.bot_detector import is_bot
 from cl.lib.elasticsearch_utils import (
@@ -746,7 +747,7 @@ def do_es_search(
                 SEARCH_TYPES.RECAP,
                 SEARCH_TYPES.DOCKETS,
             ]:
-                query_citation = get_query_citation(cd)
+                query_citation = es_get_query_citation(cd)
             related_prefix = RELATED_PATTERN.search(cd["q"])
             if related_prefix:
                 related_pks = related_prefix.group("pks").split(",")

From 3e8b6e327e432b24464b946d49963e6354ef9384 Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Tue, 13 Aug 2024 16:15:46 -0700
Subject: [PATCH 179/372] fix(bulk): Escape the backtick some more

Fixes: #4283
---
 scripts/make_bulk_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 2e206c773d..e466de7c6f 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -418,7 +418,7 @@ declare -a lst="$group"
 cat >> "$OUT" <<- EOF
 echo "Loading ${lst[2]} to database"
 psql --command \
-"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, QUOTE '`', HEADER)" \
+"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, QUOTE \'\`\', HEADER)" \
 --host "\$BULK_DB_HOST" \
 --username "\$BULK_DB_USER" \
 --dbname "\$BULK_DB_NAME"

From de823ecfe429061562ef80580351f9037abadd6a Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 13 Aug 2024 19:28:44 -0500
Subject: [PATCH 180/372] feat(recap): Use docket_number components to lookup
 and match dockets

---
 .../management/commands/troller_bk.py         |   3 +
 cl/corpus_importer/tasks.py                   |  24 ++-
 cl/recap/mergers.py                           |  78 ++++++-
 cl/recap/tasks.py                             |  52 ++++-
 cl/recap/tests.py                             | 201 +++++++++++++++++-
 cl/recap_rss/tasks.py                         |   7 +-
 cl/scrapers/utils.py                          |   4 +-
 7 files changed, 347 insertions(+), 22 deletions(-)

diff --git a/cl/corpus_importer/management/commands/troller_bk.py b/cl/corpus_importer/management/commands/troller_bk.py
index 25d2930d23..6da419151a 100644
--- a/cl/corpus_importer/management/commands/troller_bk.py
+++ b/cl/corpus_importer/management/commands/troller_bk.py
@@ -280,6 +280,9 @@ async def merge_rss_data(
             court_id,
             docket["pacer_case_id"],
             docket["docket_number"],
+            docket.get("federal_defendant_number"),
+            docket.get("federal_dn_judge_initials_assigned"),
+            docket.get("federal_dn_judge_initials_referred"),
         )
         docket_entry = docket["docket_entries"][0]
         document_number = docket["docket_entries"][0]["document_number"]
diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 5f192795b4..a68fc8cf69 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -1121,7 +1121,12 @@ def do_case_query_by_pacer_case_id(
     # Merge the contents into CL.
     if d is None:
         d = async_to_sync(find_docket_object)(
-            court_id, pacer_case_id, docket_data["docket_number"]
+            court_id,
+            pacer_case_id,
+            docket_data["docket_number"],
+            docket_data.get("federal_defendant_number"),
+            docket_data.get("federal_dn_judge_initials_assigned"),
+            docket_data.get("federal_dn_judge_initials_referred"),
         )
 
     d.add_recap_source()
@@ -1274,6 +1279,9 @@ def make_docket_by_iquery_base(
         court_id,
         str(pacer_case_id),
         report_data["docket_number"],
+        report_data.get("federal_defendant_number"),
+        report_data.get("federal_dn_judge_initials_assigned"),
+        report_data.get("federal_dn_judge_initials_referred"),
         using=using,
     )
 
@@ -1653,7 +1661,12 @@ def get_docket_by_pacer_case_id(
 
     if d is None:
         d = async_to_sync(find_docket_object)(
-            court_id, pacer_case_id, docket_data["docket_number"]
+            court_id,
+            pacer_case_id,
+            docket_data["docket_number"],
+            docket_data.get("federal_defendant_number"),
+            docket_data.get("federal_dn_judge_initials_assigned"),
+            docket_data.get("federal_dn_judge_initials_referred"),
         )
 
     rds_created, content_updated = merge_pacer_docket_into_cl_docket(
@@ -1734,7 +1747,12 @@ def get_appellate_docket_by_docket_number(
 
     if d is None:
         d = async_to_sync(find_docket_object)(
-            court_id, docket_number, docket_number
+            court_id,
+            docket_number,
+            docket_number,
+            docket_data.get("federal_defendant_number"),
+            docket_data.get("federal_dn_judge_initials_assigned"),
+            docket_data.get("federal_dn_judge_initials_referred"),
         )
 
     rds_created, content_updated = merge_pacer_docket_into_cl_docket(
diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 72421ec847..3d7364e29a 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -66,18 +66,46 @@
 def confirm_docket_number_core_lookup_match(
     docket: Docket,
     docket_number: str,
+    federal_defendant_number: int | None = None,
+    federal_dn_judge_initials_assigned: str | None = None,
+    federal_dn_judge_initials_referred: str | None = None,
 ) -> Docket | None:
     """Confirm if the docket_number_core lookup match returns the right docket
     by confirming the docket_number also matches.
 
     :param docket: The docket matched by the lookup
     :param docket_number: The incoming docket_number to lookup.
+    :param federal_defendant_number: The federal defendant number to validate
+    the match.
+    :param federal_dn_judge_initials_assigned: The judge's initials assigned to
+    validate the match.
+    :param federal_dn_judge_initials_referred: The judge's initials referred to
+    validate the match.
     :return: The docket object if both dockets matched or otherwise None.
     """
     existing_docket_number = clean_docket_number(docket.docket_number)
     incoming_docket_number = clean_docket_number(docket_number)
     if existing_docket_number != incoming_docket_number:
         return None
+
+    # If the incoming data contains DN components and the docket also contains DN
+    # components, use them to confirm that the docket matches.
+    dn_fields = [
+        ("federal_defendant_number", federal_defendant_number),
+        (
+            "federal_dn_judge_initials_assigned",
+            federal_dn_judge_initials_assigned,
+        ),
+        (
+            "federal_dn_judge_initials_referred",
+            federal_dn_judge_initials_referred,
+        ),
+    ]
+    if all(
+        value is not None and getattr(docket, field) is not None
+        for field, value in dn_fields
+    ) and any(value != getattr(docket, field) for field, value in dn_fields):
+        return None
     return docket
 
 
@@ -85,6 +113,9 @@ async def find_docket_object(
     court_id: str,
     pacer_case_id: str | None,
     docket_number: str,
+    federal_defendant_number: int | None,
+    federal_dn_judge_initials_assigned: str | None,
+    federal_dn_judge_initials_referred: str | None,
     using: str = "default",
 ) -> Docket:
     """Attempt to find the docket based on the parsed docket data. If cannot be
@@ -93,6 +124,12 @@ async def find_docket_object(
     :param court_id: The CourtListener court_id to lookup
     :param pacer_case_id: The PACER case ID for the docket
     :param docket_number: The docket number to lookup.
+    :param federal_defendant_number: The federal defendant number to validate
+    the match.
+    :param federal_dn_judge_initials_assigned: The judge's initials assigned to
+    validate the match.
+    :param federal_dn_judge_initials_referred: The judge's initials referred to
+    validate the match.
     :param using: The database to use for the lookup queries.
     :return The docket found or created.
     """
@@ -142,16 +179,42 @@ async def find_docket_object(
             if kwargs.get("pacer_case_id") is None and kwargs.get(
                 "docket_number_core"
             ):
-                d = confirm_docket_number_core_lookup_match(d, docket_number)
+                d = confirm_docket_number_core_lookup_match(
+                    d,
+                    docket_number,
+                    federal_defendant_number,
+                    federal_dn_judge_initials_assigned,
+                    federal_dn_judge_initials_referred,
+                )
             if d:
                 break  # Nailed it!
         elif count > 1:
-            # Choose the oldest one and live with it.
-            d = await ds.aearliest("date_created")
-            if kwargs.get("pacer_case_id") is None and kwargs.get(
-                "docket_number_core"
+            if all(
+                [
+                    federal_defendant_number,
+                    federal_dn_judge_initials_assigned,
+                    federal_dn_judge_initials_referred,
+                ]
             ):
-                d = confirm_docket_number_core_lookup_match(d, docket_number)
+                dn_lookup = {
+                    "federal_defendant_number": federal_defendant_number,
+                    "federal_dn_judge_initials_assigned": federal_dn_judge_initials_assigned,
+                    "federal_dn_judge_initials_referred": federal_dn_judge_initials_referred,
+                }
+                dn_queryset = ds.filter(**dn_lookup).using(using)
+                count = await dn_queryset.acount()
+
+            if count == 1:
+                d = await dn_queryset.afirst()
+            else:
+                # Choose the oldest one and live with it.
+                d = await ds.aearliest("date_created")
+                if kwargs.get("pacer_case_id") is None and kwargs.get(
+                    "docket_number_core"
+                ):
+                    d = confirm_docket_number_core_lookup_match(
+                        d, docket_number
+                    )
             if d:
                 break
     if d is None:
@@ -1824,6 +1887,9 @@ def process_case_query_report(
         court_id,
         str(pacer_case_id),
         report_data["docket_number"],
+        report_data.get("federal_defendant_number"),
+        report_data.get("federal_dn_judge_initials_assigned"),
+        report_data.get("federal_dn_judge_initials_referred"),
         using="default",
     )
     d.pacer_case_id = pacer_case_id
diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index d8b9d5a842..6b538d21de 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -572,7 +572,12 @@ async def process_recap_docket(pk):
 
     # Merge the contents of the docket into CL.
     d = await find_docket_object(
-        pq.court_id, pq.pacer_case_id, data["docket_number"]
+        pq.court_id,
+        pq.pacer_case_id,
+        data["docket_number"],
+        data.get("federal_defendant_number"),
+        data.get("federal_dn_judge_initials_assigned"),
+        data.get("federal_dn_judge_initials_referred"),
     )
 
     d.add_recap_source()
@@ -749,7 +754,12 @@ async def process_recap_claims_register(pk):
 
     # Merge the contents of the docket into CL.
     d = await find_docket_object(
-        pq.court_id, pq.pacer_case_id, data["docket_number"]
+        pq.court_id,
+        pq.pacer_case_id,
+        data["docket_number"],
+        data.get("federal_defendant_number"),
+        data.get("federal_dn_judge_initials_assigned"),
+        data.get("federal_dn_judge_initials_referred"),
     )
 
     # Merge the contents into CL
@@ -839,7 +849,12 @@ async def process_recap_docket_history_report(pk):
 
     # Merge the contents of the docket into CL.
     d = await find_docket_object(
-        pq.court_id, pq.pacer_case_id, data["docket_number"]
+        pq.court_id,
+        pq.pacer_case_id,
+        data["docket_number"],
+        data.get("federal_defendant_number"),
+        data.get("federal_dn_judge_initials_assigned"),
+        data.get("federal_dn_judge_initials_referred"),
     )
 
     d.add_recap_source()
@@ -943,7 +958,12 @@ async def process_case_query_page(pk):
 
     # Merge the contents of the docket into CL.
     d = await find_docket_object(
-        pq.court_id, pq.pacer_case_id, data["docket_number"]
+        pq.court_id,
+        pq.pacer_case_id,
+        data["docket_number"],
+        data.get("federal_defendant_number"),
+        data.get("federal_dn_judge_initials_assigned"),
+        data.get("federal_dn_judge_initials_referred"),
     )
     current_case_name = d.case_name
     d.add_recap_source()
@@ -1070,7 +1090,12 @@ async def process_recap_appellate_docket(pk):
 
     # Merge the contents of the docket into CL.
     d = await find_docket_object(
-        pq.court_id, pq.pacer_case_id, data["docket_number"]
+        pq.court_id,
+        pq.pacer_case_id,
+        data["docket_number"],
+        data.get("federal_defendant_number"),
+        data.get("federal_dn_judge_initials_assigned"),
+        data.get("federal_dn_judge_initials_referred"),
     )
 
     d.add_recap_source()
@@ -1169,7 +1194,12 @@ async def process_recap_acms_docket(pk):
 
     # Merge the contents of the docket into CL.
     d = await find_docket_object(
-        pq.court_id, pq.pacer_case_id, data["docket_number"]
+        pq.court_id,
+        pq.pacer_case_id,
+        data["docket_number"],
+        data.get("federal_defendant_number"),
+        data.get("federal_dn_judge_initials_assigned"),
+        data.get("federal_dn_judge_initials_referred"),
     )
 
     d.add_recap_source()
@@ -1894,7 +1924,12 @@ def fetch_docket_by_pacer_case_id(session, court_id, pacer_case_id, fq):
         d = Docket.objects.get(pk=fq.docket_id)
     else:
         d = async_to_sync(find_docket_object)(
-            court_id, pacer_case_id, docket_data["docket_number"]
+            court_id,
+            pacer_case_id,
+            docket_data["docket_number"],
+            docket_data.get("federal_defendant_number"),
+            docket_data.get("federal_dn_judge_initials_assigned"),
+            docket_data.get("federal_dn_judge_initials_referred"),
         )
     rds_created, content_updated = merge_pacer_docket_into_cl_docket(
         d, pacer_case_id, docket_data, report, appellate=False
@@ -2560,6 +2595,9 @@ def process_recap_email(
                 epq.court_id,
                 docket_entry["pacer_case_id"],
                 docket_data["docket_number"],
+                data.get("federal_defendant_number"),
+                data.get("federal_dn_judge_initials_assigned"),
+                data.get("federal_dn_judge_initials_referred"),
             )
             docket.add_recap_source()
             async_to_sync(update_docket_metadata)(docket, docket_data)
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index ccf00ab965..87b323b559 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -2036,7 +2036,12 @@ def test_rss_feed_ingestion(self) -> None:
         rss_feed._parse_text(text)
         docket = rss_feed.data[0]
         d = async_to_sync(find_docket_object)(
-            court_id, docket["pacer_case_id"], docket["docket_number"]
+            court_id,
+            docket["pacer_case_id"],
+            docket["docket_number"],
+            docket["federal_defendant_number"],
+            docket["federal_dn_judge_initials_assigned"],
+            docket["federal_dn_judge_initials_referred"],
         )
         async_to_sync(update_docket_metadata)(d, docket)
         d.save()
@@ -7420,7 +7425,12 @@ def test_case_id_and_docket_number_core_lookup(self):
         """
 
         d = async_to_sync(find_docket_object)(
-            self.court.pk, "12345", self.docket_data["docket_number"]
+            self.court.pk,
+            "12345",
+            self.docket_data["docket_number"],
+            None,
+            "",
+            "",
         )
         async_to_sync(update_docket_metadata)(d, self.docket_data)
         d.save()
@@ -7439,7 +7449,12 @@ def test_case_id_and_docket_number_no_match(self):
         dockets = Docket.objects.all()
         self.assertEqual(dockets.count(), 4)
         d = async_to_sync(find_docket_object)(
-            self.court.pk, "12346", self.docket_data["docket_number"]
+            self.court.pk,
+            "12346",
+            self.docket_data["docket_number"],
+            1,
+            "RM",
+            "LM",
         )
         async_to_sync(update_docket_metadata)(d, self.docket_data)
         d.save()
@@ -7455,7 +7470,12 @@ def test_case_id_lookup(self):
         """Confirm if lookup by only pacer_case_id works properly."""
 
         d = async_to_sync(find_docket_object)(
-            self.court.pk, "54321", self.docket_data["docket_number"]
+            self.court.pk,
+            "54321",
+            self.docket_data["docket_number"],
+            1,
+            "RM",
+            "LM",
         )
         async_to_sync(update_docket_metadata)(d, self.docket_data)
         d.save()
@@ -7473,6 +7493,9 @@ def test_docket_number_core_lookup(self):
             self.court.pk,
             None,
             self.docket_core_data["docket_number"],
+            1,
+            "RM",
+            "LM",
         )
         async_to_sync(update_docket_metadata)(d, self.docket_core_data)
         d.save()
@@ -7490,6 +7513,9 @@ def test_docket_number_lookup(self):
             self.court.pk,
             None,
             self.docket_no_core_data["docket_number"],
+            None,
+            "",
+            "",
         )
         async_to_sync(update_docket_metadata)(d, self.docket_no_core_data)
         d.save()
@@ -7509,6 +7535,9 @@ def test_avoid_overwrite_docket_by_number_core(self):
             self.court.pk,
             self.docket_data["docket_entries"][0]["pacer_case_id"],
             self.docket_data["docket_number"],
+            1,
+            "RM",
+            "LM",
         )
 
         async_to_sync(update_docket_metadata)(d, self.docket_data)
@@ -7537,6 +7566,9 @@ def test_avoid_overwrite_docket_by_number_core_multiple_results(self):
             self.court.pk,
             self.docket_data["docket_entries"][0]["pacer_case_id"],
             self.docket_data["docket_number"],
+            1,
+            "RM",
+            "LM",
         )
 
         async_to_sync(update_docket_metadata)(d, self.docket_data)
@@ -7571,12 +7603,173 @@ def test_lookup_by_normalized_docket_number_case(self):
             self.court_appellate.pk,
             None,
             docket_data_lower_number["docket_number"],
+            1,
+            "RM",
+            "LM",
         )
         async_to_sync(update_docket_metadata)(new_d, docket_data_lower_number)
         new_d.save()
         # The existing docket is matched instead of creating a new one.
         self.assertEqual(new_d.pk, d.pk)
 
+    def test_lookup_by_docket_number_components(self):
+        """Can we match a docket using the components of the docket number?
+        If two or more dockets are found without a matching pacer_case_id, and
+        they are matched by docket_number or docket_number_core, use the
+        components of the docket number as a last resort to find the correct
+        match. If a single docket is matched, and it contains DN components use
+        them to confirm the docket matched is the right one.
+        """
+
+        # Single docket doesn't match.
+        d_1 = DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00076",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=2,
+            federal_dn_judge_initials_assigned="MA",
+            federal_dn_judge_initials_referred="DH",
+            federal_dn_case_type="cv",
+            federal_dn_office_code="1",
+        )
+        docket_matched = async_to_sync(find_docket_object)(
+            self.court_appellate.pk,
+            None,
+            "1:03-cr-00076",
+            federal_defendant_number=2,
+            federal_dn_judge_initials_assigned="ML",
+            federal_dn_judge_initials_referred="DHL",
+        )
+        self.assertNotEqual(docket_matched.pk, d_1.pk)
+
+        # Single docket match.
+        d_1_2 = DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00073",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=2,
+            federal_dn_judge_initials_assigned="MA",
+            federal_dn_judge_initials_referred="DH",
+            federal_dn_case_type="cr",
+            federal_dn_office_code="1",
+        )
+        docket_matched = async_to_sync(find_docket_object)(
+            self.court_appellate.pk,
+            None,
+            "1:03-cr-00073",
+            federal_defendant_number=2,
+            federal_dn_judge_initials_assigned="MA",
+            federal_dn_judge_initials_referred="DH",
+        )
+        self.assertEqual(docket_matched.pk, d_1_2.pk)
+
+        # Two or more Dockets matched.
+        d_2 = DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00076",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=1,
+            federal_dn_judge_initials_assigned="MR",
+            federal_dn_judge_initials_referred="DLH",
+            federal_dn_case_type="cr",
+            federal_dn_office_code="1",
+        )
+        docket_matched = async_to_sync(find_docket_object)(
+            self.court_appellate.pk,
+            None,
+            "1:03-cr-00076",
+            federal_defendant_number=1,
+            federal_dn_judge_initials_assigned="MR",
+            federal_dn_judge_initials_referred="DLH",
+        )
+        self.assertEqual(docket_matched.pk, d_2.pk)
+
+    def test_avoid_lookup_by_docket_number_components(self):
+        """If either the docket or the docket_data contains None values for
+        DN components. Avoid using them to match the docket.
+        """
+
+        # Dockets in DB contains docket_number components but the incoming data
+        # doesn't.
+        oldest_d = DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00075",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=1,
+            federal_dn_judge_initials_assigned="MR",
+            federal_dn_judge_initials_referred="LM",
+            federal_dn_case_type="cv",
+            federal_dn_office_code="1",
+        )
+        DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00075",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=1,
+            federal_dn_judge_initials_assigned="MR",
+            federal_dn_judge_initials_referred="LM",
+            federal_dn_case_type="cv",
+            federal_dn_office_code="1",
+        )
+        docket_matched = async_to_sync(find_docket_object)(
+            self.court_appellate.pk,
+            None,
+            "1:03-cr-00075",
+            federal_defendant_number=None,
+            federal_dn_judge_initials_assigned="",
+            federal_dn_judge_initials_referred="",
+        )
+        # DN components are not used to match the docket. The oldest one is
+        # selected instead.
+        self.assertEqual(docket_matched.pk, oldest_d.pk)
+
+        # Dockets in DB lacks of docket_number components.
+        oldest_d_1 = DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00076",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=None,
+            federal_dn_judge_initials_assigned="",
+            federal_dn_judge_initials_referred="",
+            federal_dn_case_type="",
+            federal_dn_office_code="",
+        )
+        DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00076",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=None,
+            federal_dn_judge_initials_assigned="",
+            federal_dn_judge_initials_referred="",
+            federal_dn_case_type="",
+            federal_dn_office_code="",
+        )
+        docket_matched = async_to_sync(find_docket_object)(
+            self.court_appellate.pk,
+            None,
+            "1:03-cr-00076",
+            federal_defendant_number=1,
+            federal_dn_judge_initials_assigned="MR",
+            federal_dn_judge_initials_referred="DLH",
+        )
+        # DN components are not used to match the docket. The oldest one is
+        # selected instead.
+        self.assertEqual(docket_matched.pk, oldest_d_1.pk)
+
 
 class CleanUpDuplicateAppellateEntries(TestCase):
     """Test clean_up_duplicate_appellate_entries method that finds and clean
diff --git a/cl/recap_rss/tasks.py b/cl/recap_rss/tasks.py
index 747c5c9e6e..82b5cd2f24 100644
--- a/cl/recap_rss/tasks.py
+++ b/cl/recap_rss/tasks.py
@@ -340,7 +340,12 @@ def merge_rss_feed_contents(self, feed_data, court_pk, metadata_only=False):
                 # in another thread/process and we had a race condition.
                 continue
             d = async_to_sync(find_docket_object)(
-                court_pk, docket["pacer_case_id"], docket["docket_number"]
+                court_pk,
+                docket["pacer_case_id"],
+                docket["docket_number"],
+                docket.get("federal_defendant_number"),
+                docket.get("federal_dn_judge_initials_assigned"),
+                docket.get("federal_dn_judge_initials_referred"),
             )
 
             d.add_recap_source()
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index b5e9fe51c1..fa5022dc28 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -334,7 +334,9 @@ def update_or_create_docket(
         "date_argued": date_argued,
     }
 
-    docket = async_to_sync(find_docket_object)(court_id, None, docket_number)
+    docket = async_to_sync(find_docket_object)(
+        court_id, None, docket_number, None, None, None
+    )
     if docket.pk:
         # Update the existing docket with the new values
         docket.add_opinions_source(source)

From 84492fb53d2cdf1230640e5bd240a3813723c120 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Tue, 13 Aug 2024 23:44:45 -0300
Subject: [PATCH 181/372] issue-4140 Change download from post to get and
 delete form. WIP modufy CSS

---
 cl/assets/static-global/css/override.css      |   8 +-
 .../templates/includes/de_filter.html         | 225 +++++++++---------
 cl/opinion_page/urls.py                       |   2 +-
 cl/opinion_page/views.py                      |  25 +-
 4 files changed, 129 insertions(+), 131 deletions(-)

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index cb62236644..fca9db68e8 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -881,11 +881,11 @@ input.court-checkbox, input.status-checkbox {
 
 /* Docket page */
 #id_entry_gte, #id_entry_lte {
-  width: 60px;
+  width: 45px;
 }
 
 #id_filed_after, #id_filed_before {
-  width: 95px;
+  width: 90px;
 }
 
 #docket-entry-table .row {
@@ -1606,8 +1606,8 @@ textarea {
 }
 
 .tight-input input, .tight-input a {
-  padding: 6px 6px;
-  margin: 0px 0px;
+  padding: 5px 5px !important;
+  margin: 0px 0px !important;
 }
 
 @media (max-width: 768px) {
diff --git a/cl/opinion_page/templates/includes/de_filter.html b/cl/opinion_page/templates/includes/de_filter.html
index 4941901855..f79cdfe1ea 100644
--- a/cl/opinion_page/templates/includes/de_filter.html
+++ b/cl/opinion_page/templates/includes/de_filter.html
@@ -1,124 +1,125 @@
 <div class="well well-sm">
   <div class="row filter-box">
-    <!-- Search -->
-    <div class="col-xs-12 col-md-2">
-      {% if docket_entries %}
-        <div class="row" id="main-query-box">
-          <div id="search-container" class="col-xs-12 text-center">
-            <label class="sr-only" for="de-filter-search">Search</label>
-            <div class="input-group">
-              <input class="form-control"
-                     id="de-filter-search"
-                     autocomplete="off"
-                     type="text"
-                     placeholder="Search this docket">
-              <span class="input-group-btn">
-                <a type="submit"
-                        href="/?type=r&amp;q=docket_id%3A{{ docket.pk }}"
-                        class="btn btn-primary"
-                        id="search-button-de-filter"><i class="fa fa-search"></i>
-                </a>
-              </span>
+    <div >
+      <!-- Search -->
+      <div class="col-xs-12 col-md-1 col-lg-2">
+        {% if docket_entries %}
+          <div class="row" id="main-query-box">
+            <div id="search-container" class="col-xs-12 text-center">
+              <label class="sr-only" for="de-filter-search">Search</label>
+              <div class="input-group">
+                <input class="form-control"
+                       id="de-filter-search"
+                       autocomplete="off"
+                       type="text"
+                       placeholder="Search this docket">
+                <span class="input-group-btn">
+                  <a type="submit"
+                          href="/?type=r&amp;q=docket_id%3A{{ docket.pk }}"
+                          class="btn btn-primary"
+                          id="search-button-de-filter"><i class="fa fa-search"></i>
+                  </a>
+                </span>
+              </div>
             </div>
           </div>
-        </div>
-      {% endif %}
-    </div>
-    <form action="">
-      <div class="form-inline">
-        {% if form.errors %}
-          <div class="col-xs-12">
-            <p class="text-danger"><strong>There were errors applying your filters.</strong>
-            </p>
-          </div>
         {% endif %}
-        <!-- date select -->
-        <div class="tight-input col-xs-6 col-sm-5 col-md-4 col-lg-3">
-          <div class="cl-form-group{% if form.filed_after.errors %} has-error{% endif %}">
-            <label for="id_filed_after"
-                   class="control-label">Filed</label>
-            {{ form.filed_after }}
-          </div>
-          <div class="cl-form-group{% if form.filed_before.errors %} has-error{% endif %}">
-            <label for="id_filed_before"
-                   class="control-label">to</label>
-            {{ form.filed_before }}
-          </div>
-        </div>
-        <!-- Document group -->
-        <div class="tight-input col-xs-6 col-sm-4 col-md-3">
-          <div class="cl-form-group{% if form.entry_gte.errors %} has-error{% endif %}">
-            <label for="id_entry_gte" class="control-label">Documents</label>
-            {{ form.entry_gte }}
-          </div>
-          <div class="cl-form-group{% if form.entry_lte.errors %} has-error{% endif %}">
-            <label for="id_entry_lte"
-                   class="control-label">to</label>
-            {{ form.entry_lte }}
+      </div>
+      <div class="col-xs-12 col-md-10 col-lg-9">
+        <form action="">
+        <div class="row form-inline">
+          {% if form.errors %}
+            <div class="col-xs-12">
+              <p class="text-danger"><strong>There were errors applying your filters.</strong>
+              </p>
+            </div>
+          {% endif %}
+          <div class="tight-input col-xs-12 col-sm-7 col-md-7 col-lg-7">
+            <div class="row form-inline">
+              <!-- date select -->
+              <div class="tight-input col-xs-12 col-sm-6 col-md-6 col-lg-7">
+                <div class="cl-form-group{% if form.filed_after.errors %} has-error{% endif %}">
+                  <label for="id_filed_after"
+                         class="control-label">Filed</label>
+                  {{ form.filed_after }}
+                </div>
+                <div class="cl-form-group{% if form.filed_before.errors %} has-error{% endif %}">
+                  <label for="id_filed_before"
+                         class="control-label">to</label>
+                  {{ form.filed_before }}
+                </div>
+              </div>
+              <!-- Document group -->
+              <div class="tight-input col-xs-12 col-sm-6 col-md-6 col-lg-5">
+              <div class="cl-form-group{% if form.entry_gte.errors %} has-error{% endif %}">
+                <label for="id_entry_gte" class="control-label">Documents</label>
+                {{ form.entry_gte }}
+              </div>
+              <div class="cl-form-group{% if form.entry_lte.errors %} has-error{% endif %}">
+                <label for="id_entry_lte"
+                       class="control-label">to</label>
+                {{ form.entry_lte }}
+              </div>
+            </div>
+            </div>
           </div>
-        </div>
-        <!-- sort ordering -->
-        <div class="tight-input col-xs-6 col-sm-3 col-md-2">
-          <div id="sort-buttons"
-               class="btn-group tight-input pull-right xs-pull-left cl-sm-pull-left"
-               data-toggle="buttons"
-               role="group"
-               aria-label="sorting buttons">
-            <label for="id_order_by_0"
-                   class="btn btn-default {% if sort_order_asc %}active{% endif %}">
-              <input type="radio"
-                     id="id_order_by_0"
-                     value="asc"
-                     name="order_by"
-                     {% if sort_order_asc %}checked="checked" {%endif %}/><i class="fa fa-sort-numeric-asc"></i>&nbsp;<span class="hidden-md">Asc.</span></label>
-            <label for="id_order_by_1"
-                   class="btn btn-default {% if not sort_order_asc %}active{% endif %}">
-              <input type="radio"
-                     id="id_order_by_1"
-                     class="btn btn-default"
-                     {% if not sort_order_asc %}checked="checked"{% endif %}
-                     value="desc"
-                     name="order_by"/><i class="fa fa-sort-numeric-desc"></i>&nbsp;<span class="hidden-md">Desc.</span></label>
+
+          <!-- sort ordering -->
+          <div class="tight-input col-xs-6 col-sm-3 col-md-3 col-lg-3">
+            <div id="sort-buttons"
+                 class="btn-group tight-input pull-right xs-pull-left cl-sm-pull-left"
+                 data-toggle="buttons"
+                 role="group"
+                 aria-label="sorting buttons">
+              <label for="id_order_by_0"
+                     class="btn btn-default {% if sort_order_asc %}active{% endif %}">
+                <input type="radio"
+                       id="id_order_by_0"
+                       value="asc"
+                       name="order_by"
+                       {% if sort_order_asc %}checked="checked" {%endif %}/><i class="fa fa-sort-numeric-asc"></i>&nbsp;<span class="hidden-md">Asc.</span></label>
+              <label for="id_order_by_1"
+                     class="btn btn-default {% if not sort_order_asc %}active{% endif %}">
+                <input type="radio"
+                       id="id_order_by_1"
+                       class="btn btn-default"
+                       {% if not sort_order_asc %}checked="checked"{% endif %}
+                       value="desc"
+                       name="order_by"/><i class="fa fa-sort-numeric-desc"></i>&nbsp;<span class="hidden-md">Desc.</span></label>
+            </div>
           </div>
-        </div>
-        <!-- Navigate buttons -->
-        <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
-          <div  >
-            {% if docket_entries.has_previous %}
-              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
-                <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
-              </a>
-            {% else %}
-              <a class="btn btn-default disabled" href="#" rel="prev" >
-                <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
-              </a>
-            {% endif %}
-            {% if docket_entries.has_next %}
-              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
-                <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
-              </a>
-            {% else %}
-              <a class="btn btn-default disabled" href="#" rel="next" >
-                <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
-              </a>
-            {% endif %}
+          <!-- Navigate buttons -->
+          <div class="tight-input col-xs-6 hidden-sm col-sm-2 col-md-2 col-lg-" >
+            <div  >
+              {% if docket_entries.has_previous %}
+                <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
+                  <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
+                </a>
+              {% else %}
+                <a class="btn btn-default disabled" href="#" rel="prev" >
+                  <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
+                </a>
+              {% endif %}
+              {% if docket_entries.has_next %}
+                <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
+                  <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
+                </a>
+              {% else %}
+                <a class="btn btn-default disabled" href="#" rel="next" >
+                  <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
+                </a>
+              {% endif %}
+            </div>
           </div>
+          <button type="submit" class="sr-only">Filter</button>
         </div>
-        <button type="submit" class="sr-only">Filter</button>
+      </form>
       </div>
-    </form>
-    <form action="{% url 'view_download_docket' %}" method="post" class="pull-right">
-      {% csrf_token %}
-      <input type="hidden" name="court_id" value="{{ docket.court_id }}">
-      <input type="hidden" name="case_name" value="{{ docket.slug }}">
-      <input type="hidden" name="court_listener_id" value="{{ docket.id }}">
-      <input type="hidden" name="docket_id" value="{{ docket.id }}">
-      <input type="hidden" name="docket_entries" value="{{ docket_entries }}">
-
-      <button type="submit" class="btn btn-primary btn-sm btn-test">
-        <i class="fa fa-download"></i>
-        <span class="d-none d-lg-inline">Download</span>
-    </button>
-    </form>
+      <div class=" col-xs-12 col-md-1 col-lg-1">
+        <a href="{% url 'view_download_docket' docket.id %}" class="btn btn-primary pull-right">
+          <i class="fa fa-download"></i>
+          Download</a>
+      </div>
+    </div>
   </div>
 </div>
diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index 812366ef0f..9d3324601b 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -55,7 +55,7 @@
         "docket/<int:pk>/<blank-slug:slug>/", view_docket, name="view_docket"  # type: ignore[arg-type]
     ),
     path(
-        "docket/download/",
+        "docket/download/<int:docket_id>/",
         download_docket_entries_csv, # type: ignore[arg-type]
         name="view_download_docket"
     ),
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index bd95708c1a..3ac8771122 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -581,7 +581,7 @@ async def make_thumb_if_needed(
 
 
 async def download_docket_entries_csv(
-    request: HttpRequest
+    request: HttpRequest, docket_id: int
 ) -> HttpResponse:
     """Download csv file containing list of DocketEntry for specific Docket
     """
@@ -616,22 +616,19 @@ def generate_csv(de_list, filename):
         logger.debug(f"HERE: {filename}")
         return response
 
-    if request.method == 'POST':
-        case_name = request.POST.get("case_name", "lala")
-        court_id = request.POST.get("court_id", "lele")
-        court_listener_id = request.POST.get("court_listener_id", "lili")
-        docket_id = request.POST.get("docket_id")
+    #FIXME delete form without modifying solution
+    form = DocketEntryFilterForm(request.POST, request=request)
+    docket, _ = await core_docket_data(request, docket_id)
+    de_list = await fetch_docket_entries(request, docket, form)
+    court_id = docket.court_id
+    case_name = docket.slug
 
-        form = DocketEntryFilterForm(request.POST, request=request)
-        docket, _ = await core_docket_data(request, docket_id)
-        de_list = await fetch_docket_entries(request, docket, form)
 
-        date_str = datetime.datetime.now().strftime("%Y-%m-%d")
-        filename = f"{case_name}.{court_id}.{court_listener_id}_{date_str}.csv"
+    date_str = datetime.datetime.now().strftime("%Y-%m-%d")
+    filename = f"{case_name}.{court_id}.{docket_id}_{date_str}.csv"
 
-        response = await sync_to_async(generate_csv)(de_list, filename)
-        return response
-    return HttpResponseBadRequest("Invalid request method.")
+    response = await sync_to_async(generate_csv)(de_list, filename)
+    return response
 
 
 

From 19c00aa518d4de00d6f1ded1de17e79eda0b1732 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 12 Aug 2024 16:27:19 -0400
Subject: [PATCH 182/372] feat(proxy): Removes logic to handle legacy cached
 session data

---
 cl/corpus_importer/tasks.py |  9 +++----
 cl/lib/pacer_session.py     |  4 +---
 cl/lib/tests.py             | 35 +--------------------------
 cl/recap/tasks.py           | 47 +++++++------------------------------
 cl/recap/tests.py           |  8 +------
 5 files changed, 14 insertions(+), 89 deletions(-)

diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 5f192795b4..6435f656d7 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -1011,12 +1011,9 @@ def get_pacer_case_id_and_title(
     )
 
     if not session_data and user_pk:
-        cookies_from_cache = get_pacer_cookie_from_cache(user_pk)
-        session_data = (
-            cookies_from_cache
-            if isinstance(cookies_from_cache, SessionData)
-            else SessionData(cookies_from_cache)
-        )
+        session_data = get_pacer_cookie_from_cache(user_pk)
+        if not session_data:
+            raise Exception("Cookies not available in cache")
     else:
         raise Exception(
             "user_pk is unavailable, cookies cannot be retrieved from cache"
diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py
index e1d3000837..c37c8cd11a 100644
--- a/cl/lib/pacer_session.py
+++ b/cl/lib/pacer_session.py
@@ -164,9 +164,7 @@ def get_or_cache_pacer_cookies(
     ttl_seconds = r.ttl(session_key % user_pk)
     if cookies_data and ttl_seconds >= 300 and not refresh:
         # cookies were found in cache and ttl >= 5 minutes, return them
-        if isinstance(cookies_data, SessionData):
-            return cookies_data
-        return SessionData(cookies_data)
+        return cookies_data
 
     # Unable to find cookies in cache, are about to expire or refresh needed
     # Login and cache new values.
diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index 57295289cb..f4eafca652 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -105,11 +105,6 @@ def setUp(self) -> None:
             r.delete(*key)
         self.test_cookies = RequestsCookieJar()
         self.test_cookies.set("PacerSession", "this-is-a-test")
-        r.set(
-            session_key % "test_user_old_format",
-            pickle.dumps(self.test_cookies),
-            ex=60 * 60,
-        )
         r.set(
             session_key % "test_user_new_format",
             pickle.dumps(
@@ -117,11 +112,6 @@ def setUp(self) -> None:
             ),
             ex=60 * 60,
         )
-        r.set(
-            session_key % "test_old_format_almost_expired",
-            pickle.dumps(self.test_cookies),
-            ex=60,
-        )
         r.set(
             session_key % "test_new_format_almost_expired",
             pickle.dumps(
@@ -138,14 +128,6 @@ def test_pick_random_proxy_when_list_is_available(self):
             ["http://proxy_1:9090", "http://proxy_2:9090"],
         )
 
-    def test_use_default_proxy_host_for_old_cookie_format(self):
-        """Can we handle the old cookie format properly?"""
-        session_data = get_or_cache_pacer_cookies(
-            "test_user_old_format", username="test", password="password"
-        )
-        self.assertIsInstance(session_data, SessionData)
-        self.assertEqual(session_data.proxy_address, "http://proxy_1:9090")
-
     @patch("cl.lib.pacer_session.log_into_pacer")
     def test_compute_new_cookies_with_new_format(self, mock_log_into_pacer):
         """Are we using the dataclass for new cookies?"""
@@ -175,21 +157,6 @@ def test_compute_cookies_for_almost_expired_data(
         self, mock_log_into_pacer
     ):
         """Are we using the dataclass when re-computing session?"""
-        mock_log_into_pacer.return_value = SessionData(
-            self.test_cookies, "http://proxy_1:9090"
-        )
-
-        # Attempts to get almost expired cookies with the old format from cache
-        # Expects refresh.
-        session_data = get_or_cache_pacer_cookies(
-            "test_old_format_almost_expired",
-            username="test",
-            password="password",
-        )
-        self.assertEqual(mock_log_into_pacer.call_count, 1)
-        self.assertIsInstance(session_data, SessionData)
-        self.assertEqual(session_data.proxy_address, "http://proxy_1:9090")
-
         mock_log_into_pacer.return_value = SessionData(
             self.test_cookies, "http://proxy_2:9090"
         )
@@ -202,7 +169,7 @@ def test_compute_cookies_for_almost_expired_data(
             password="password",
         )
         self.assertIsInstance(session_data, SessionData)
-        self.assertEqual(mock_log_into_pacer.call_count, 2)
+        self.assertEqual(mock_log_into_pacer.call_count, 1)
         self.assertEqual(session_data.proxy_address, "http://proxy_2:9090")
 
 
diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index d8b9d5a842..3d3a83108b 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -1640,24 +1640,13 @@ def fetch_pacer_doc_by_rd(
         self.request.chain = None
         return
 
-    cookies = get_pacer_cookie_from_cache(fq.user_id)
-    if not cookies:
+    session_data = get_pacer_cookie_from_cache(fq.user_id)
+    if not session_data:
         msg = "Unable to find cached cookies. Aborting request."
         mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
         self.request.chain = None
         return
 
-    # Ensures session data is a `SessionData` instance for consistent handling.
-    #
-    # Currently, handles potential legacy data by converting them to
-    # `SessionData`. This defensive check can be removed in future versions
-    # once all data is guaranteed to be in the expected format.
-    #
-    # This approach prevents disruptions during processing of enqueued data
-    # after deployment.
-    session_data = (
-        cookies if isinstance(cookies, SessionData) else SessionData(cookies)
-    )
     pacer_case_id = rd.docket_entry.docket.pacer_case_id
     de_seq_num = rd.docket_entry.pacer_sequence_number
     try:
@@ -1752,18 +1741,12 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> None:
         mark_fq_status(fq, msg, PROCESSING_STATUS.NEEDS_INFO)
         return
 
-    cookies = get_pacer_cookie_from_cache(fq.user_id)
-    if not cookies:
+    session_data = get_pacer_cookie_from_cache(fq.user_id)
+    if not session_data:
         msg = "Unable to find cached cookies. Aborting request."
         mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
         return
 
-    # Ensures session data is a `SessionData` instance for consistent handling.
-    # This approach prevents disruptions during processing of enqueued data
-    # after deployment.
-    session_data = (
-        cookies if isinstance(cookies, SessionData) else SessionData(cookies)
-    )
     try:
         r = get_att_report_by_rd(rd, session_data)
     except HTTPError as exc:
@@ -1937,18 +1920,13 @@ def fetch_docket(self, fq_pk):
 
     async_to_sync(mark_pq_status)(fq, "", PROCESSING_STATUS.IN_PROGRESS)
 
-    cookies_data = get_pacer_cookie_from_cache(fq.user_id)
-    if cookies_data is None:
+    session_data = get_pacer_cookie_from_cache(fq.user_id)
+    if session_data is None:
         msg = f"Cookie cache expired before task could run for user: {fq.user_id}"
         mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED)
         self.request.chain = None
         return None
 
-    session_data = (
-        cookies_data
-        if isinstance(cookies_data, SessionData)
-        else SessionData(cookies_data)
-    )
     s = ProxyPacerSession(
         cookies=session_data.cookies, proxy=session_data.proxy_address
     )
@@ -2297,10 +2275,7 @@ def get_and_copy_recap_attachment_docs(
     :return: None
     """
 
-    cookies = get_pacer_cookie_from_cache(user_pk)
-    session_data = (
-        cookies if isinstance(cookies, SessionData) else SessionData(cookies)
-    )
+    session_data = get_pacer_cookie_from_cache(user_pk)
     appellate = False
     unique_pqs = []
     for rd_att in att_rds:
@@ -2412,13 +2387,7 @@ def get_and_merge_rd_attachments(
     """
 
     all_attachment_rds = []
-    cookies = get_pacer_cookie_from_cache(user_pk)
-    # Ensures session data is a `SessionData` instance for consistent handling.
-    # This approach prevents disruptions during processing of enqueued data
-    # after deployment.
-    session_data = (
-        cookies if isinstance(cookies, SessionData) else SessionData(cookies)
-    )
+    session_data = get_pacer_cookie_from_cache(user_pk)
     # Try to get the attachment page without being logged into PACER
     att_report_text = get_attachment_page_by_url(document_url, court_id)
     if att_report_text:
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index ccf00ab965..959d296a4e 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -771,7 +771,6 @@ def test_processing_an_acms_attachment_page(self, mock_upload):
 )
 @mock.patch(
     "cl.recap.tasks.get_pacer_cookie_from_cache",
-    side_effect=lambda x: True,
 )
 class RecapDocketFetchApiTest(TestCase):
     """Tests for the RECAP docket Fetch API
@@ -1046,10 +1045,7 @@ def test_key_serialization_with_client_code(self, mock) -> None:
     "cl.corpus_importer.tasks.FreeOpinionReport",
     new=fakes.FakeFreeOpinionReport,
 )
-@mock.patch(
-    "cl.recap.tasks.get_pacer_cookie_from_cache",
-    return_value={"cookie": "foo"},
-)
+@mock.patch("cl.recap.tasks.get_pacer_cookie_from_cache")
 @mock.patch(
     "cl.recap.tasks.is_pacer_court_accessible",
     side_effect=lambda a: True,
@@ -1151,7 +1147,6 @@ def test_fetch_att_page_no_cookies(self, mock_court_accessible) -> None:
 
     @mock.patch(
         "cl.recap.tasks.get_pacer_cookie_from_cache",
-        return_value={"pacer_cookie": "foo"},
     )
     @mock.patch(
         "cl.corpus_importer.tasks.AttachmentPage",
@@ -6999,7 +6994,6 @@ def test_send_notifications_if_webhook_still_disabled(
 )
 @mock.patch(
     "cl.recap.tasks.get_pacer_cookie_from_cache",
-    side_effect=lambda x: True,
 )
 class RecapFetchWebhooksTest(TestCase):
     """Test RECAP Fetch Webhooks"""

From 2a203decfe34f38fc0686714aee27e626724a0e1 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 14 Aug 2024 11:31:58 -0500
Subject: [PATCH 183/372] fix(recap): Consider partial docket_number components
 matches

---
 cl/recap/mergers.py | 72 ++++++++++++++++++++++-----------------------
 cl/recap/tests.py   | 62 +++++++++++++++++++++++++++++++++++---
 2 files changed, 94 insertions(+), 40 deletions(-)

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 3d7364e29a..8ccdc4e472 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -66,12 +66,13 @@
 def confirm_docket_number_core_lookup_match(
     docket: Docket,
     docket_number: str,
-    federal_defendant_number: int | None = None,
+    federal_defendant_number: str | None = None,
     federal_dn_judge_initials_assigned: str | None = None,
     federal_dn_judge_initials_referred: str | None = None,
 ) -> Docket | None:
     """Confirm if the docket_number_core lookup match returns the right docket
-    by confirming the docket_number also matches.
+    by confirming the docket_number and docket_number components also matches
+    if they're available.
 
     :param docket: The docket matched by the lookup
     :param docket_number: The incoming docket_number to lookup.
@@ -88,24 +89,24 @@ def confirm_docket_number_core_lookup_match(
     if existing_docket_number != incoming_docket_number:
         return None
 
-    # If the incoming data contains DN components and the docket also contains DN
-    # components, use them to confirm that the docket matches.
-    dn_fields = [
-        ("federal_defendant_number", federal_defendant_number),
-        (
-            "federal_dn_judge_initials_assigned",
-            federal_dn_judge_initials_assigned,
-        ),
-        (
-            "federal_dn_judge_initials_referred",
-            federal_dn_judge_initials_referred,
-        ),
-    ]
-    if all(
-        value is not None and getattr(docket, field) is not None
-        for field, value in dn_fields
-    ) and any(value != getattr(docket, field) for field, value in dn_fields):
-        return None
+    # If the incoming data contains docket_number components and the docket
+    # also contains DN components, use them to confirm that the docket matches.
+    dn_components = {
+        "federal_defendant_number": federal_defendant_number,
+        "federal_dn_judge_initials_assigned": federal_dn_judge_initials_assigned,
+        "federal_dn_judge_initials_referred": federal_dn_judge_initials_referred,
+    }
+    # Only compare DN component values if both the incoming data and the docket contain
+    # non-None DN component values.
+    for dn_key, dn_value in dn_components.items():
+        incoming_dn_value = dn_value
+        docket_dn_value = getattr(docket, dn_key, None)
+        if (
+            incoming_dn_value
+            and docket_dn_value
+            and incoming_dn_value != docket_dn_value
+        ):
+            return None
     return docket
 
 
@@ -113,7 +114,7 @@ async def find_docket_object(
     court_id: str,
     pacer_case_id: str | None,
     docket_number: str,
-    federal_defendant_number: int | None,
+    federal_defendant_number: str | None,
     federal_dn_judge_initials_assigned: str | None,
     federal_dn_judge_initials_referred: str | None,
     using: str = "default",
@@ -189,21 +190,20 @@ async def find_docket_object(
             if d:
                 break  # Nailed it!
         elif count > 1:
-            if all(
-                [
-                    federal_defendant_number,
-                    federal_dn_judge_initials_assigned,
-                    federal_dn_judge_initials_referred,
-                ]
-            ):
-                dn_lookup = {
-                    "federal_defendant_number": federal_defendant_number,
-                    "federal_dn_judge_initials_assigned": federal_dn_judge_initials_assigned,
-                    "federal_dn_judge_initials_referred": federal_dn_judge_initials_referred,
-                }
-                dn_queryset = ds.filter(**dn_lookup).using(using)
-                count = await dn_queryset.acount()
-
+            # If more than one docket matches, try refining the results using
+            # available docket_number components.
+            dn_components = {
+                "federal_defendant_number": federal_defendant_number,
+                "federal_dn_judge_initials_assigned": federal_dn_judge_initials_assigned,
+                "federal_dn_judge_initials_referred": federal_dn_judge_initials_referred,
+            }
+            dn_lookup = {
+                dn_key: dn_value
+                for dn_key, dn_value in dn_components.items()
+                if dn_value
+            }
+            dn_queryset = ds.filter(**dn_lookup).using(using)
+            count = await dn_queryset.acount()
             if count == 1:
                 d = await dn_queryset.afirst()
             else:
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 87b323b559..18d9598f77 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -7667,8 +7667,62 @@ def test_lookup_by_docket_number_components(self):
         )
         self.assertEqual(docket_matched.pk, d_1_2.pk)
 
-        # Two or more Dockets matched.
+        # Partial DN components single docket match.
+        d_1_2_3 = DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00072",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=None,
+            federal_dn_judge_initials_assigned="MA",
+            federal_dn_judge_initials_referred="",
+            federal_dn_case_type="cr",
+            federal_dn_office_code="1",
+        )
+        docket_matched = async_to_sync(find_docket_object)(
+            self.court_appellate.pk,
+            None,
+            "1:03-cr-00072",
+            federal_defendant_number=2,
+            federal_dn_judge_initials_assigned="MA",
+            federal_dn_judge_initials_referred="DH",
+        )
+        self.assertEqual(docket_matched.pk, d_1_2_3.pk)
+        docket_data = self.docket_data.copy()
+        docket_data["federal_dn_judge_initials_assigned"] = "MA"
+        async_to_sync(update_docket_metadata)(docket_matched, docket_data)
+        docket_matched.save()
+        docket_matched.refresh_from_db()
+        self.assertEqual(
+            docket_matched.federal_dn_judge_initials_assigned, "MA"
+        )
+
+        # Two or more Dockets matched. Partial DN components matched.
         d_2 = DocketFactory(
+            case_name="Young v. State",
+            docket_number="1:03-cr-00050",
+            court=self.court_appellate,
+            source=Docket.RECAP,
+            pacer_case_id=None,
+            federal_defendant_number=None,
+            federal_dn_judge_initials_assigned="MR",
+            federal_dn_judge_initials_referred="",
+            federal_dn_case_type="cr",
+            federal_dn_office_code="1",
+        )
+        docket_matched = async_to_sync(find_docket_object)(
+            self.court_appellate.pk,
+            None,
+            "1:03-cr-00050",
+            federal_defendant_number=None,
+            federal_dn_judge_initials_assigned="MR",
+            federal_dn_judge_initials_referred="",
+        )
+        self.assertEqual(docket_matched.pk, d_2.pk)
+
+        # Two or more Dockets matched. All DN components matched.
+        d_3 = DocketFactory(
             case_name="Young v. State",
             docket_number="1:03-cr-00076",
             court=self.court_appellate,
@@ -7688,7 +7742,7 @@ def test_lookup_by_docket_number_components(self):
             federal_dn_judge_initials_assigned="MR",
             federal_dn_judge_initials_referred="DLH",
         )
-        self.assertEqual(docket_matched.pk, d_2.pk)
+        self.assertEqual(docket_matched.pk, d_3.pk)
 
     def test_avoid_lookup_by_docket_number_components(self):
         """If either the docket or the docket_data contains None values for
@@ -7703,7 +7757,7 @@ def test_avoid_lookup_by_docket_number_components(self):
             court=self.court_appellate,
             source=Docket.RECAP,
             pacer_case_id=None,
-            federal_defendant_number=1,
+            federal_defendant_number=None,
             federal_dn_judge_initials_assigned="MR",
             federal_dn_judge_initials_referred="LM",
             federal_dn_case_type="cv",
@@ -7715,7 +7769,7 @@ def test_avoid_lookup_by_docket_number_components(self):
             court=self.court_appellate,
             source=Docket.RECAP,
             pacer_case_id=None,
-            federal_defendant_number=1,
+            federal_defendant_number=None,
             federal_dn_judge_initials_assigned="MR",
             federal_dn_judge_initials_referred="LM",
             federal_dn_case_type="cv",

From 5fa20c307323b95b074dd1d9bd5f0ffc8ecae5ee Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 14 Aug 2024 11:59:49 -0500
Subject: [PATCH 184/372] fix(recap): Fixed recap.email docket data dict for DN
 components

---
 cl/recap/tasks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index 3510348f58..ea093f940f 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -2564,9 +2564,9 @@ def process_recap_email(
                 epq.court_id,
                 docket_entry["pacer_case_id"],
                 docket_data["docket_number"],
-                data.get("federal_defendant_number"),
-                data.get("federal_dn_judge_initials_assigned"),
-                data.get("federal_dn_judge_initials_referred"),
+                docket_data.get("federal_defendant_number"),
+                docket_data.get("federal_dn_judge_initials_assigned"),
+                docket_data.get("federal_dn_judge_initials_referred"),
             )
             docket.add_recap_source()
             async_to_sync(update_docket_metadata)(docket, docket_data)

From d82900d10447ce1116238b877d0ed91cba138eb0 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 14 Aug 2024 20:57:44 -0600
Subject: [PATCH 185/372] feat(update_opinions_order): add columbia ordering
 option

---
 .../commands/update_opinions_order.py         | 312 +++++++++++++++++-
 1 file changed, 311 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 1f1e5308e9..651f7fc928 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,12 +1,29 @@
 import argparse
+import os
+import re
 import time
+from typing import Optional
 
+from bs4 import BeautifulSoup
 from django.db import transaction
 from django.db.models import Count
 
+from cl.corpus_importer.import_columbia.columbia_utils import (
+    extract_columbia_opinions,
+    map_opinion_types,
+    process_extracted_opinions,
+    read_xml_to_soup,
+)
+from cl.corpus_importer.utils import EmptyOpinionException, match_opinion_lists
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.search.models import SOURCES, Opinion, OpinionCluster
 
+VALID_COLUMBIA_SOURCES = [
+    key
+    for key in dict(SOURCES.NAMES).keys()
+    if SOURCES.COLUMBIA_ARCHIVE in key
+]
+
 
 def sort_harvard_opinions(options) -> None:
     """Sort harvard opinions
@@ -65,6 +82,296 @@ def sort_harvard_opinions(options) -> None:
             time.sleep(options["delay"])
 
 
+def fix_filepath(filepath: str) -> str:
+    """Fix filepath from file field
+
+    :param filepath: path from file field
+    :return: new file path
+    """
+    if "/home/mlissner/columbia/opinions/" in filepath:
+        filepath = filepath.replace("/home/mlissner/columbia/opinions/", "")
+    return filepath
+
+
+def clean_opinion_content(text: str) -> str:
+    """Clean opinion content
+
+    :param text: text to clean
+    :return: cleaned text
+    """
+
+    # Replace line breaks with spaces and get rid of double spaces
+    text = re.sub(" +", " ", " ".join(text.split("\n"))).strip()
+
+    # Remove non-alphanumeric and non-whitespace characters from lowercased text
+    return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
+
+
+def get_opinions_cleaned_content(
+    cluster_id,
+) -> tuple[Optional[str], list[dict], int, bool]:
+    """Get cleaned opinions content for a cluster object
+
+    :param cluster_id: Cluster ID for a set of opinions
+    :return: (xml path, list of extracted opinions, start position, True if combined
+    opinions exists in cluster)
+    """
+    cl_cleaned_opinions = []
+    # by default the opinions are ordered by pk
+    opinions_from_cluster = Opinion.objects.filter(
+        cluster_id=cluster_id
+    ).order_by("id")
+    combined_opinions_cluster = opinions_from_cluster.filter(
+        type="010combined"
+    )
+    xml_path = None
+    cluster_has_combined_opinion = False
+    if combined_opinions_cluster:
+        # the combined opinion will be displayed at beginning
+        start_position = combined_opinions_cluster.count()
+        cluster_has_combined_opinion = True
+    else:
+        # we don't have combined opinions, we start ordering from 0 to n
+        start_position = 0
+
+    for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
+        if op.local_path and not xml_path:
+            xml_path = str(op.local_path)
+
+        content = None
+
+        # We can only use columbia's content to infer the ordering
+        if len(op.html_columbia) > 1:
+            content = op.html_columbia
+
+        if not content:
+            raise EmptyOpinionException(
+                "There is no content in html_columbia field"
+            )
+
+        soup = BeautifulSoup(content, features="html.parser")
+        opinion_text = soup.getText(separator=" ", strip=True)
+        prep_text = clean_opinion_content(opinion_text)
+
+        cl_cleaned_opinions.append(
+            {
+                "id": op.id,
+                "byline": op.author_str,
+                "type": op.type,
+                "opinion": prep_text,
+                "order": i,
+            }
+        )
+
+    return (
+        xml_path,
+        cl_cleaned_opinions,
+        start_position,
+        cluster_has_combined_opinion,
+    )
+
+
+def get_opinions_columbia_file(xml_filepath: str) -> list:
+    """Get opinions from columbia xml file and convert it into dict
+
+    :param xml_filepath: path of xml file
+    :return: dict with data
+    """
+    soup = read_xml_to_soup(xml_filepath)
+
+    # Find the outer <opinion> tag to have all elements inside
+    outer_opinion = soup.find("opinion")
+
+    extracted_opinions = extract_columbia_opinions(outer_opinion)
+    opinions = process_extracted_opinions(extracted_opinions)
+    map_opinion_types(opinions)
+
+    for op in opinions:
+        opinion_content = op.get("opinion")
+        soup = BeautifulSoup(opinion_content, "html.parser")
+        opinion_text = soup.getText(separator=" ", strip=True)
+        cleaned_opinion = clean_opinion_content(opinion_text)
+        op["opinion"] = cleaned_opinion
+
+    return opinions
+
+
+def update_columbia_opinions(
+    cluster_id: int,
+    cl_opinions: list,
+    columbia_opinions: list,
+    matches: dict,
+    cluster_has_combined_opinion: bool,
+    start_position: int,
+):
+    """Update opinions with correct order
+
+    :param cluster_id:
+    :param cl_opinions: a list with cleaned opinions from cl
+    :param columbia_opinions: a ordered list with cleaned opinions from xml file
+    :param matches: a dict with the matches of each opinion of both lists
+    :param cluster_has_combined_opinion: True if the cluster has combined opinions
+    :param start_position: the number from where the order should begin for
+    non-combined opinions
+    :return: None
+    """
+    update_failed = False
+
+    with transaction.atomic():
+        for file_pos, cl_pos in matches.items():
+            # file_pos is the correct index to find the opinion id to update
+            file_opinion = columbia_opinions[file_pos]
+            # the order was calculated using the xml file
+            file_order = file_opinion.get("order") + start_position
+            cl_opinion = cl_opinions[cl_pos]
+            opinion_id_to_update = cl_opinion.get("id")
+
+            if opinion_id_to_update:
+                try:
+                    # Update opinion order
+                    op = Opinion.objects.get(id=opinion_id_to_update)
+                    op.order = file_order
+                    op.save()
+                except Opinion.DoesNotExist:
+                    # This should not happen, but it is better to be
+                    # cautious
+                    logger.warning(
+                        f"We can't update opinion, opinion doesn't exist "
+                        f"with id: {opinion_id_to_update}"
+                    )
+                    update_failed = True
+                    break
+
+        if cluster_has_combined_opinion and not update_failed:
+            combined_opinions_cluster = Opinion.objects.filter(
+                cluster_id=cluster_id, type="010combined"
+            ).order_by("id")
+
+            # Show combined opinions at beginning
+            for opinion_order, cluster_op in enumerate(
+                combined_opinions_cluster
+            ):
+                cluster_op.order = opinion_order
+                cluster_op.save()
+
+        if update_failed:
+            # There was an error updating an opinion, rollback all changes for
+            # cluster's opinions
+            logger.warning(
+                f"There was an error updating the order of opinions of the "
+                f"cluster id: {cluster_id}"
+            )
+            transaction.set_rollback(True)
+        else:
+            logger.info(
+                f"The order of opinions was updated, cluster id: {cluster_id}"
+            )
+
+
+def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
+    """Update opinion ordering for columbia clusters
+
+    :param start_id: skip any id lower than this value
+    :param end_id: skip any id greater than this value
+    :param xml_dir: absolute path to the directory with columbia xml files
+    :return: None
+    """
+
+    # Get all columbia cluster ids with more than one opinion
+    clusters = (
+        OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
+        .filter(opinions_count__gt=1, source__in=VALID_COLUMBIA_SOURCES)
+        .order_by("id")
+        .values_list("id", flat=True)
+    )
+
+    if start_id:
+        clusters = filter(lambda x: x >= start_id, clusters)
+
+    if end_id:
+        clusters = filter(lambda x: x <= end_id, clusters)
+
+    for cluster_id in clusters:
+        logger.info(f"Processing cluster id: {cluster_id}")
+
+        try:
+            (
+                xml_path,
+                cl_cleaned_opinions,
+                start_position,
+                cluster_has_combined_opinion,
+            ) = get_opinions_cleaned_content(cluster_id)
+        except EmptyOpinionException:
+            logger.warning(
+                f"At least one of the opinions from cluster id: {cluster_id} is empty."
+            )
+            continue
+
+        extracted_columbia_opinions = None
+        if xml_path:
+            fixed_xml_filepath = os.path.join(xml_dir, fix_filepath(xml_path))
+
+            if not os.path.exists(fixed_xml_filepath):
+                logger.warning(
+                    f"Xml file not found in {fixed_xml_filepath}, cluster id: {cluster_id}"
+                )
+                continue
+
+            try:
+                extracted_columbia_opinions = get_opinions_columbia_file(
+                    fixed_xml_filepath
+                )
+            except UnicodeDecodeError:
+                logger.warning(f"Cannot decode file: {fixed_xml_filepath}")
+                continue
+
+        if cl_cleaned_opinions and extracted_columbia_opinions:
+            columbia_opinions_content = [
+                op.get("opinion")
+                for op in extracted_columbia_opinions
+                if op.get("opinion")
+            ]
+            cl_opinions_content = [
+                op.get("opinion")
+                for op in cl_cleaned_opinions
+                if op.get("opinion")
+            ]
+
+            matches = match_opinion_lists(
+                columbia_opinions_content,
+                cl_opinions_content,
+            )
+
+            if matches:
+                if len(matches.values()) != len(set(matches.values())):
+                    # We don't have a unique match for each opinion, they were
+                    # probably combined incorrectly
+                    logger.info(
+                        f"We can't infer opinions order for cluster id: {cluster_id}"
+                    )
+                    # Go to next cluster id
+                    continue
+
+                if len(cl_cleaned_opinions) > len(set(matches.values())):
+                    # We have more opinions than matches
+                    logger.info(
+                        f"We couldn't match all cl opinions to the file's "
+                        f"content, cluster id: {cluster_id}"
+                    )
+                    # Go to next cluster id
+                    continue
+
+                # Update all opinions order
+                update_columbia_opinions(
+                    cluster_id,
+                    cl_cleaned_opinions,
+                    extracted_columbia_opinions,
+                    matches,
+                    cluster_has_combined_opinion,
+                    start_position,
+                )
+
+
 class Command(VerboseCommand):
     help = "Add ordering Key for sub opinions"
 
@@ -112,4 +419,7 @@ def handle(self, *args, **options):
         super().handle(*args, **options)
         options["action"](options)
 
-    VALID_ACTIONS = {"sort-harvard": sort_harvard_opinions}
+    VALID_ACTIONS = {
+        "sort-harvard": sort_harvard_opinions,
+        "sort-columbia": sort_columbia_opinions,
+    }

From 999c1950792fdf5c78bcefc90d841624846b5c28 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 00:16:55 -0400
Subject: [PATCH 186/372] Update model_helpers.py

Trying to fix a minor regex issue
---
 cl/lib/model_helpers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 00093cd816..5821609bef 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -514,8 +514,9 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
     """
     # Simple pattern for Federal Register citations
     fr_match = re.search(
-        r"(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,5})", og_docket_number
+        r'(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,3}(?:,\d{3})*)', og_docket_number
     )
+
     if fr_match:
         volume, page = fr_match.groups()
         return f"https://www.federalregister.gov/citation/{volume}-FR-{page}"

From 2820b6f9e1e7538276aa6fbae5e4ca366e552e6a Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 00:21:34 -0400
Subject: [PATCH 187/372] Update tests.py

accounting for minor regex issue with new test case
---
 cl/lib/tests.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index f4eafca652..04f2929768 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -1255,15 +1255,20 @@ def test_linkify_orig_docket_number(self):
                 "https://www.federalregister.gov/citation/85-FR-12345",
             ),
             (
-                "Bureau of Land Managemnet",
+                "Bureau of Land Management",
                 "88FR20688",
                 "https://www.federalregister.gov/citation/88-FR-20688",
             ),
             (
-                "Bureau of Land Managemnet",
+                "Bureau of Land Management",
                 "88 Fed Reg 34523",
                 "https://www.federalregister.gov/citation/88-FR-34523",
             ),
+            (
+                "Department of Transportation",
+                "89 Fed. Reg. 34,620",
+                "https://www.federalregister.gov/citation/88-FR-34,620",
+            ),
             ("Federal Communications Commission", "19-CA-289275", ""),
             (
                 "National Labor Relations Board",

From ff2fade40434116b00377734888239011eaf29de Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 04:24:46 +0000
Subject: [PATCH 188/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/lib/model_helpers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 5821609bef..1c6ac3e126 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -514,7 +514,8 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
     """
     # Simple pattern for Federal Register citations
     fr_match = re.search(
-        r'(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,3}(?:,\d{3})*)', og_docket_number
+        r"(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,3}(?:,\d{3})*)",
+        og_docket_number,
     )
 
     if fr_match:

From 7bcf46a2e1cb6338cce49a3710b67459ddd89146 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 00:38:03 -0400
Subject: [PATCH 189/372] Update tests.py

fix error in test
---
 cl/lib/tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index 04f2929768..f8b2587b84 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -1267,7 +1267,7 @@ def test_linkify_orig_docket_number(self):
             (
                 "Department of Transportation",
                 "89 Fed. Reg. 34,620",
-                "https://www.federalregister.gov/citation/88-FR-34,620",
+                "https://www.federalregister.gov/citation/89-FR-34,620",
             ),
             ("Federal Communications Commission", "19-CA-289275", ""),
             (

From 7b4c629441a4ec6052706c85fe4e1485b2877e66 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 09:03:01 -0400
Subject: [PATCH 190/372] Update model_helpers.py

---
 cl/lib/model_helpers.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 1c6ac3e126..6a29e7e045 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -514,8 +514,7 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
     """
     # Simple pattern for Federal Register citations
     fr_match = re.search(
-        r"(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,3}(?:,\d{3})*)",
-        og_docket_number,
+        r"(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,5}(?:,\d{3})*)", og_docket_number
     )
 
     if fr_match:

From 9ef1d9fa11150f096ebe11a0782a68fcc2d64198 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 13:03:49 +0000
Subject: [PATCH 191/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/lib/model_helpers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 6a29e7e045..f217b8defa 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -514,7 +514,8 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
     """
     # Simple pattern for Federal Register citations
     fr_match = re.search(
-        r"(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,5}(?:,\d{3})*)", og_docket_number
+        r"(\d{1,3})\s*(?:FR|Fed\.?\s*Reg\.?)\s*(\d{1,5}(?:,\d{3})*)",
+        og_docket_number,
     )
 
     if fr_match:

From bbd0f1a9e61a3f93d2c497db1b87f2c0a44d44cf Mon Sep 17 00:00:00 2001
From: jtmst <josh.troy.mills@gmail.com>
Date: Thu, 15 Aug 2024 10:20:11 -0400
Subject: [PATCH 192/372] update migrations and sql files

---
 .../migrations/0034_add_harvard_pdf_to_opinioncluster.py    | 4 +---
 .../migrations/0034_add_harvard_pdf_to_opinioncluster.sql   | 6 ++++--
 .../0034_add_harvard_pdf_to_opinioncluster_customers.sql    | 3 ++-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.py b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.py
index ad8d331906..b73b7925c2 100644
--- a/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.py
+++ b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-08-07 17:35
+# Generated by Django 5.0.7 on 2024-08-14 18:16
 
 import cl.lib.model_helpers
 import cl.lib.storage
@@ -27,7 +27,6 @@ class Migration(migrations.Migration):
             field=models.FileField(
                 blank=True,
                 help_text="The case PDF from the Caselaw Access Project for this cluster",
-                null=True,
                 storage=cl.lib.storage.IncrementingAWSMediaStorage(),
                 upload_to=cl.lib.model_helpers.make_upload_path,
             ),
@@ -38,7 +37,6 @@ class Migration(migrations.Migration):
             field=models.FileField(
                 blank=True,
                 help_text="The case PDF from the Caselaw Access Project for this cluster",
-                null=True,
                 storage=cl.lib.storage.IncrementingAWSMediaStorage(),
                 upload_to=cl.lib.model_helpers.make_upload_path,
             ),
diff --git a/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.sql b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.sql
index b45c149f2b..3f1cc19daf 100644
--- a/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.sql
+++ b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster.sql
@@ -1,4 +1,6 @@
 BEGIN;
-ALTER TABLE "search_opinioncluster" ADD COLUMN "filepath_pdf_harvard" varchar(100) NULL;
-ALTER TABLE "search_opinionclusterevent" ADD COLUMN "filepath_pdf_harvard" varchar(100) NULL;
+ALTER TABLE "search_opinioncluster" ADD COLUMN "filepath_pdf_harvard" varchar(100) DEFAULT '' NOT NULL;
+ALTER TABLE "search_opinioncluster" ALTER COLUMN "filepath_pdf_harvard" DROP DEFAULT;
+ALTER TABLE "search_opinionclusterevent" ADD COLUMN "filepath_pdf_harvard" varchar(100) DEFAULT '' NOT NULL;
+ALTER TABLE "search_opinionclusterevent" ALTER COLUMN "filepath_pdf_harvard" DROP DEFAULT;
 COMMIT;
\ No newline at end of file
diff --git a/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster_customers.sql b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster_customers.sql
index cd6be11330..71248e195a 100644
--- a/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster_customers.sql
+++ b/cl/search/migrations/0034_add_harvard_pdf_to_opinioncluster_customers.sql
@@ -1,3 +1,4 @@
 BEGIN;
-ALTER TABLE "search_opinioncluster" ADD COLUMN "filepath_pdf_harvard" varchar(100) NULL;
+ALTER TABLE "search_opinioncluster" ADD COLUMN "filepath_pdf_harvard" varchar(100) DEFAULT '' NOT NULL;
+ALTER TABLE "search_opinioncluster" ALTER COLUMN "filepath_pdf_harvard" DROP DEFAULT;
 COMMIT;
\ No newline at end of file

From fac3f134c60ccb87e7fe904550a19da50376a300 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 12 Aug 2024 23:32:08 -0500
Subject: [PATCH 193/372] fix(scrapers.tests): test new logger calls inside
 get_binary_content

---
 cl/scrapers/tests.py | 16 +++++++++++-----
 cl/scrapers/utils.py |  4 ++--
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 497a491d4d..9749042ca0 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -8,6 +8,7 @@
 from django.conf import settings
 from django.core.files.base import ContentFile
 from django.utils.timezone import now
+from juriscraper.AbstractSite import logger
 
 from cl.alerts.factories import AlertFactory
 from cl.alerts.models import Alert
@@ -626,16 +627,19 @@ def setUp(self):
         self.mock_response.content = b"not empty"
         self.mock_response.headers = {"Content-Type": "application/pdf"}
         self.site = test_opinion_scraper.Site()
+        self.site.method = "GET"
+        self.logger = logger
 
     @mock.patch("requests.Session.get")
     def test_unexpected_content_type(self, mock_get):
         """Test when content type doesn't match scraper expectation."""
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = ["text/html"]
-
-        with self.assertLogs(level="ERROR") as cm:
+        with mock.patch.object(self.logger, "error") as error_mock:
             get_binary_content("/dummy/url/", self.site)
-        self.assertIn("UnexpectedContentTypeError:", cm.output[0])
+        self.assertIn(
+            "UnexpectedContentTypeError:", error_mock.call_args_list[0][0][0]
+        )
 
     @mock.patch("requests.Session.get")
     def test_correct_content_type(self, mock_get):
@@ -643,7 +647,7 @@ def test_correct_content_type(self, mock_get):
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = ["application/pdf"]
 
-        with self.assertNoLogs(level="ERROR"):
+        with mock.patch.object(self.logger, "error") as error_mock:
             _ = get_binary_content("/dummy/url/", self.site)
 
             self.mock_response.headers = {
@@ -651,6 +655,7 @@ def test_correct_content_type(self, mock_get):
             }
             mock_get.return_value = self.mock_response
             _ = get_binary_content("/dummy/url/", self.site)
+            error_mock.assert_not_called()
 
     @mock.patch("requests.Session.get")
     def test_no_content_type(self, mock_get):
@@ -658,5 +663,6 @@ def test_no_content_type(self, mock_get):
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = None
 
-        with self.assertNoLogs(level="ERROR"):
+        with mock.patch.object(self.logger, "error") as error_mock:
             _ = get_binary_content("/dummy/url/", self.site)
+            error_mock.assert_not_called()
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index 15ef2bda7e..a1a2e6eab2 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -181,8 +181,7 @@ def get_binary_content(
         url = os.path.join(settings.MEDIA_ROOT, download_url)
         mr = MockRequest(url=url)
         r = mr.get()
-        r = follow_redirections(r, requests.Session())
-        r.raise_for_status()
+        s = requests.Session()
     else:
         # some sites require a custom ssl_context, contained in the Site's
         # session. However, we can't send a request with both a
@@ -222,6 +221,7 @@ def get_binary_content(
                 content_type in mime.lower()
                 for mime in site.expected_content_types
             )
+
             if not m:
                 error = (
                     f"UnexpectedContentTypeError: {download_url}\n"

From f05cc35bf5b3adece980d7f8ff279dabe1f5c487 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 13:42:51 -0400
Subject: [PATCH 194/372] Update model_helpers.py

US Tax Court pattern
---
 cl/lib/model_helpers.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index f217b8defa..5b6b83b021 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -500,7 +500,7 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
       - Mallory uploads a bad document via the RECAP APIs (these are open APIs).
       - The code here parses that upload in a way to create a redirect on the federalregister.gov
         website.
-      - federalregsiter.gov has an open redirect vulnerability (these are common).
+      - federalregister.gov has an open redirect vulnerability (these are common).
       - The user clicks a link on our site that goes to federalregister.gov, which redirects the
         user to evilsite.com (b/c evilsite.com got through our checks here).
       - The user is tricked on that site into doing something bad.
@@ -534,6 +534,16 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
             )
             return f"https://www.nlrb.gov/case/{formatted_number}"
 
+    # US Tax Court pattern
+    if "Tax" in agency:
+        match = re.match(r"^(?:USTC-)?(\d{1,5})-(\d{2})([A-Z])?$", og_docket_number)
+        if match:
+            number, year, letter_suffix = match.groups()
+            formatted_number = f"{number.zfill(5)}-{year}"
+            if letter_suffix:
+                formatted_number += letter_suffix
+            return f"https://dawson.ustaxcourt.gov/case-detail/{formatted_number}"
+
     """Add other agencies as feasible. Note that the Federal Register link should cover multiple agencies.
     """
     # If no match is found, return empty str

From d63d34f774b50d3a44e6d97b98dba2f1947f1bc2 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 13:46:55 -0400
Subject: [PATCH 195/372] Update model_helpers.py

Adding EPA non-Federal Register pattern
---
 cl/lib/model_helpers.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 5b6b83b021..ee195fc20e 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -544,6 +544,12 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
                 formatted_number += letter_suffix
             return f"https://dawson.ustaxcourt.gov/case-detail/{formatted_number}"
 
+    # EPA non-Federal Register pattern
+    if "Environmental Protection" in agency:
+        match = re.match(r"^EPA-(HQ|R\d{2})-[A-Z]{2,5}-\d{4}-\d{4}$", og_docket_number)
+        if match:
+            return f"https://www.regulations.gov/docket/{match}"
+
     """Add other agencies as feasible. Note that the Federal Register link should cover multiple agencies.
     """
     # If no match is found, return empty str

From 98a358354fa702b6d39b33d3490a77a4d9100085 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 14:33:44 -0400
Subject: [PATCH 196/372] Update tests.py

new tests for Tax Court and EPA (non-Federal Register)
---
 cl/lib/tests.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/cl/lib/tests.py b/cl/lib/tests.py
index f8b2587b84..b913787656 100644
--- a/cl/lib/tests.py
+++ b/cl/lib/tests.py
@@ -1269,6 +1269,26 @@ def test_linkify_orig_docket_number(self):
                 "89 Fed. Reg. 34,620",
                 "https://www.federalregister.gov/citation/89-FR-34,620",
             ),
+            (
+                "Environmental Protection Agency",
+                "EPA-HQ-OW-2020-0005",
+                "https://www.regulations.gov/docket/EPA-HQ-OW-2020-0005",
+            ),
+            (
+                "United States Tax Court",
+                "USTC-2451-13",
+                "https://dawson.ustaxcourt.gov/case-detail/02451-13",
+            ),
+            (
+                "United States Tax Court",
+                "6837-20",
+                "https://dawson.ustaxcourt.gov/case-detail/06837-20",
+            ),
+            (
+                "United States Tax Court",
+                "USTC-5903-19W",
+                "https://dawson.ustaxcourt.gov/case-detail/05903-19W",
+            ),
             ("Federal Communications Commission", "19-CA-289275", ""),
             (
                 "National Labor Relations Board",

From 1be9a5e73ad253aa778a66942137aae050136be6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 18:37:43 +0000
Subject: [PATCH 197/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/lib/model_helpers.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index ee195fc20e..2fe39f231c 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -536,17 +536,23 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
 
     # US Tax Court pattern
     if "Tax" in agency:
-        match = re.match(r"^(?:USTC-)?(\d{1,5})-(\d{2})([A-Z])?$", og_docket_number)
+        match = re.match(
+            r"^(?:USTC-)?(\d{1,5})-(\d{2})([A-Z])?$", og_docket_number
+        )
         if match:
             number, year, letter_suffix = match.groups()
             formatted_number = f"{number.zfill(5)}-{year}"
             if letter_suffix:
                 formatted_number += letter_suffix
-            return f"https://dawson.ustaxcourt.gov/case-detail/{formatted_number}"
+            return (
+                f"https://dawson.ustaxcourt.gov/case-detail/{formatted_number}"
+            )
 
     # EPA non-Federal Register pattern
     if "Environmental Protection" in agency:
-        match = re.match(r"^EPA-(HQ|R\d{2})-[A-Z]{2,5}-\d{4}-\d{4}$", og_docket_number)
+        match = re.match(
+            r"^EPA-(HQ|R\d{2})-[A-Z]{2,5}-\d{4}-\d{4}$", og_docket_number
+        )
         if match:
             return f"https://www.regulations.gov/docket/{match}"
 

From 9c6d6eaf8fc5a2f120b00352113c6da771e1acb8 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 14:49:05 -0400
Subject: [PATCH 198/372] Update model_helpers.py

---
 cl/lib/model_helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 2fe39f231c..bf93fd240b 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -554,7 +554,7 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
             r"^EPA-(HQ|R\d{2})-[A-Z]{2,5}-\d{4}-\d{4}$", og_docket_number
         )
         if match:
-            return f"https://www.regulations.gov/docket/{match}"
+            return f"https://www.regulations.gov/docket/{match.group(1)}"
 
     """Add other agencies as feasible. Note that the Federal Register link should cover multiple agencies.
     """

From cd95705d3561ddcf176410f9e50d0131e901c3e5 Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 15:01:40 -0400
Subject: [PATCH 199/372] Update model_helpers.py

---
 cl/lib/model_helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index bf93fd240b..afeb1ba38c 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -554,7 +554,7 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
             r"^EPA-(HQ|R\d{2})-[A-Z]{2,5}-\d{4}-\d{4}$", og_docket_number
         )
         if match:
-            return f"https://www.regulations.gov/docket/{match.group(1)}"
+            return f"https://www.regulations.gov/docket/{match.group(0)}"
 
     """Add other agencies as feasible. Note that the Federal Register link should cover multiple agencies.
     """

From ad975d99fe62e00465072fdd4b610f0db708fe43 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Thu, 15 Aug 2024 17:07:23 -0500
Subject: [PATCH 200/372] feat(scrapers.tests): add tests for
 cl_back_scrape_citations

Related to freelawproject/juriscraper#858
---
 .../commands/cl_back_scrape_citations.py      | 27 +++++---
 cl/scrapers/tests.py                          | 62 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/cl/scrapers/management/commands/cl_back_scrape_citations.py b/cl/scrapers/management/commands/cl_back_scrape_citations.py
index 8fd59843c8..11f269c14b 100644
--- a/cl/scrapers/management/commands/cl_back_scrape_citations.py
+++ b/cl/scrapers/management/commands/cl_back_scrape_citations.py
@@ -38,9 +38,13 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
         court_str = site.court_id.split(".")[-1].split("_")[0]
 
         for case in site:
-            citation = case.get("citation")
-            parallel_citation = case.get("parallel_citation")
+            citation = case.get("citations")
+            parallel_citation = case.get("parallel_citations")
             if not citation and not parallel_citation:
+                logger.debug(
+                    "No citation, skipping row for case %s",
+                    case.get("case_names"),
+                )
                 continue
 
             content = get_binary_content(case["download_urls"], site)
@@ -54,7 +58,9 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
             except Opinion.DoesNotExist:
                 missing_opinions.append(case)
                 logger.info(
-                    "Opinion with URL '%s' does not exist. Has citation '%s'. Will try to ingest all objects",
+                    "Case '%s', opinion '%s' has no matching hash in the DB. "
+                    "Has a citation '%s'. Will try to ingest all objects",
+                    case["case_names"],
                     case["download_urls"],
                     citation or parallel_citation,
                 )
@@ -80,14 +86,21 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
         # exists, it will be in the case dictionary, and will be saved in a
         # regular ingestion process
         if missing_opinions:
-            site.cases = missing_opinions
-            super().scrape_court(site, full_crawl=True)
+            # It is easy to ingest a filtered list of cases for OpinionSiteLinear
+            # but not for plain OpinionSite
+            if hasattr(site, "cases"):
+                site.cases = missing_opinions
+                super().scrape_court(site, full_crawl=True)
+            else:
+                logger.info("Run the backscraper to collect missing opinions")
 
     def citation_is_duplicated(
         self, citation_candidate: Citation, cluster: OpinionCluster, cite: str
     ) -> bool:
-        """Checks for exact or reporter duplication of citation in the cluster"""
-        citation_params = citation_candidate.__dict__
+        """Checks for exact or reporter duplication of citation in the cluster
+        Inspired on corpus_importer.utils.add_citations_to_cluster
+        """
+        citation_params = {**citation_candidate.__dict__}
         citation_params.pop("_state", "")
         citation_params.pop("id", "")
 
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 9749042ca0..0d9382615e 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -22,6 +22,7 @@
 from cl.lib.test_helpers import generate_docket_target_sources
 from cl.scrapers.DupChecker import DupChecker
 from cl.scrapers.management.commands import (
+    cl_back_scrape_citations,
     cl_scrape_opinions,
     cl_scrape_oral_arguments,
 )
@@ -29,8 +30,13 @@
 from cl.scrapers.tasks import extract_doc_content, process_audio_file
 from cl.scrapers.test_assets import test_opinion_scraper, test_oral_arg_scraper
 from cl.scrapers.utils import get_binary_content, get_extension
-from cl.search.factories import CourtFactory, DocketFactory
-from cl.search.models import Court, Docket, Opinion
+from cl.search.factories import (
+    CourtFactory,
+    DocketFactory,
+    OpinionClusterFactory,
+    OpinionFactory,
+)
+from cl.search.models import Citation, Court, Docket, Opinion
 from cl.settings import MEDIA_ROOT
 from cl.tests.cases import ESIndexTestCase, SimpleTestCase, TestCase
 from cl.tests.fixtures import ONE_SECOND_MP3_BYTES, SMALL_WAV_BYTES
@@ -666,3 +672,55 @@ def test_no_content_type(self, mock_get):
         with mock.patch.object(self.logger, "error") as error_mock:
             _ = get_binary_content("/dummy/url/", self.site)
             error_mock.assert_not_called()
+
+
+class ScrapeCitationsTestCase(TestCase):
+    """This class only tests the update of existing clusters
+    Since the ingestion of new clusters and their citations call
+    super().scrape_court(), it should be tested in the superclass
+    """
+
+    def setUp(self):
+        keys = [
+            "download_urls",
+            "case_names",
+            "citations",
+            "parallel_citations",
+        ]
+        self.mock_site = mock.MagicMock()
+        self.mock_site.__iter__.return_value = [
+            # update
+            dict(zip(keys, ["", "something", "482 Md. 342", ""])),
+            # exact duplicate
+            dict(zip(keys, ["", "something", "", "482 Md. 342"])),
+            # reporter duplicate
+            dict(zip(keys, ["", "something", "485 Md. 111", ""])),
+            # no citation, ignore
+            dict(zip(keys, ["", "something", "", ""])),
+        ]
+        self.mock_site.court_id = "juriscraper.md"
+        self.hash = "1234" * 10
+        self.hashes = [self.hash, self.hash, self.hash, "111"]
+
+        court = CourtFactory(id="md")
+        docket = DocketFactory(
+            case_name="Attorney Grievance v. Taniform",
+            docket_number="40ag/21",
+            court_id="md",
+            source=Docket.SCRAPER,
+            pacer_case_id=None,
+        )
+        self.cluster = OpinionClusterFactory(docket=docket)
+        opinion = OpinionFactory(sha1=self.hash, cluster=self.cluster)
+
+    def test_citation_scraper(self):
+        """Test if citation scraper creates a citation or ignores duplicates"""
+        cmd = "cl.scrapers.management.commands.cl_back_scrape_citations"
+        with mock.patch(f"{cmd}.sha1", side_effect=self.hashes):
+            with mock.patch(
+                f"{cmd}.get_binary_content", return_value="placeholder"
+            ):
+                cl_back_scrape_citations.Command().scrape_court(self.mock_site)
+
+        citations = Citation.objects.filter(cluster=self.cluster).count()
+        self.assertEqual(citations, 1, "Exactly 1 citation was expected")

From a6629d0f48ed2186c8ab22e956ae6d737e907e7a Mon Sep 17 00:00:00 2001
From: Mike Lissner <mike@free.law>
Date: Thu, 15 Aug 2024 16:17:35 -0700
Subject: [PATCH 201/372] tweak(bulk data): Align whitespace

---
 scripts/make_bulk_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index b880a12757..43c5d32461 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -91,7 +91,7 @@ opinioncluster_fields='(
        nature_of_suit, posture, syllabus, headnotes, summary, disposition,
        history, other_dates, cross_reference, correction, citation_count,
        precedential_status, date_blocked, blocked, filepath_json_harvard,
-			 filepath_pdf_harvard, docket_id, arguments, headmatter
+	       filepath_pdf_harvard, docket_id, arguments, headmatter
    )'
 opinioncluster_csv_filename="opinion-clusters-$(date -I).csv"
 

From 19592f0353dfc87ecb13b261364d2edc817781fb Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Thu, 15 Aug 2024 23:21:25 -0400
Subject: [PATCH 202/372] Update model_helpers.py

---
 cl/lib/model_helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 26a15cdb5d..28d2e503ed 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -537,7 +537,7 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
             return f"https://www.nlrb.gov/case/{formatted_number}"
 
     # US Tax Court pattern
-    if "Tax" in agency:
+    if "Tax" or "Internal Revenue" in agency:
         match = re.match(
             r"^(?:USTC-)?(\d{1,5})-(\d{2})([A-Z])?$", og_docket_number
         )

From 097614683885b70c39d8663897cfea74a25e27c4 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 16 Aug 2024 10:31:11 -0500
Subject: [PATCH 203/372] fix(elasticsearch): Fixed query citation for clusters
 with multiple sibling opinions.

Fixes: #4211
---
 cl/citations/match_citations_queries.py | 19 +++++++++++---
 cl/search/tests/tests_es_opinion.py     | 35 +++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/cl/citations/match_citations_queries.py b/cl/citations/match_citations_queries.py
index 0e6ed1abe8..a320ac860b 100644
--- a/cl/citations/match_citations_queries.py
+++ b/cl/citations/match_citations_queries.py
@@ -131,11 +131,13 @@ def es_case_name_query(
 
 
 def es_search_db_for_full_citation(
-    full_citation: FullCaseCitation,
+    full_citation: FullCaseCitation, query_citation: bool = False
 ) -> list[Hit]:
     """For a citation object, try to match it to an item in the database using
     a variety of heuristics.
     :param full_citation: A FullCaseCitation instance.
+    :param query_citation: Whether this is related to es_get_query_citation
+    resolution
     return: A ElasticSearch Result object with the results, or an empty list if
      no hits
     """
@@ -147,12 +149,20 @@ def es_search_db_for_full_citation(
         Q(
             "term", **{"status.raw": "Published"}
         ),  # Non-precedential documents aren't cited
-        Q("match", cluster_child="opinion"),
     ]
+
+    if query_citation:
+        # If this is related to query citation resolution, look for
+        # opinion_cluster to determine if a citation matched a single cluster.
+        filters.append(Q("match", cluster_child="opinion_cluster"))
+    else:
+        filters.append(Q("match", cluster_child="opinion"))
+
     must_not = []
     if full_citation.citing_opinion is not None:
         # Eliminate self-cites.
         must_not.append(Q("match", id=full_citation.citing_opinion.pk))
+
     # Set up filter parameters
     if full_citation.year:
         start_year = end_year = full_citation.year
@@ -204,7 +214,6 @@ def es_search_db_for_full_citation(
                 full_citation.citing_opinion,
             )
             return results
-
     # Give up.
     return []
 
@@ -225,7 +234,9 @@ def es_get_query_citation(cd: CleanData) -> Hit | None:
     matches = None
     if len(citations) == 1:
         # If it's not exactly one match, user doesn't get special help.
-        matches = es_search_db_for_full_citation(citations[0])
+        matches = es_search_db_for_full_citation(
+            citations[0], query_citation=True
+        )
         if len(matches) == 1:
             # If more than one match, don't show the tip
             return matches[0]
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 16d3a94516..ddb8839798 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1982,6 +1982,7 @@ def test_display_query_citation_frontend(self) -> None:
         """Confirm if the query citation alert is shown on the frontend when
         querying a single citation, and it's found into ES."""
 
+        # Cluster with citation and multiple sibling opinions is properly matched.
         with self.captureOnCommitCallbacks(execute=True):
             cluster = OpinionClusterFactory.create(
                 precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
@@ -1989,6 +1990,7 @@ def test_display_query_citation_frontend(self) -> None:
                 date_filed=datetime.date(2024, 8, 23),
             )
             OpinionFactory.create(cluster=cluster, plain_text="")
+            OpinionFactory.create(cluster=cluster, plain_text="")
             CitationWithParentsFactory.create(
                 volume=31,
                 reporter="Pa. D. & C.",
@@ -2009,6 +2011,39 @@ def test_display_query_citation_frontend(self) -> None:
         self.assertIn(
             "It looks like you're trying to search for", r.content.decode()
         )
+
+        # Add a new cluster for the same citation. This time, it is not
+        # possible to identify a unique case for the citation.
+        with self.captureOnCommitCallbacks(execute=True):
+            cluster_2 = OpinionClusterFactory.create(
+                case_name="Test case",
+                precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
+                docket=self.docket_1,
+                date_filed=datetime.date(2024, 8, 23),
+            )
+            OpinionFactory.create(cluster=cluster_2, plain_text="")
+            CitationWithParentsFactory.create(
+                volume=31,
+                reporter="Pa. D. & C.",
+                page="445",
+                type=2,
+                cluster=cluster_2,
+            )
+
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "31 Pa. D. & C. 445",
+            "order_by": "score desc",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        self.assertNotIn(
+            "It looks like you're trying to search for", r.content.decode()
+        )
+
+        cluster_2.delete()
         cluster.delete()
 
 

From 65c78c2de5a0c2b646d77a523d95a4a09640bea1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 16 Aug 2024 11:57:11 -0600
Subject: [PATCH 204/372] feat(update_opinions_order): update columbia ordering
 function, ignore combined opinions

---
 .../import_columbia/columbia_utils.py         |  4 +-
 .../commands/update_opinions_order.py         | 84 +++++++------------
 2 files changed, 32 insertions(+), 56 deletions(-)

diff --git a/cl/corpus_importer/import_columbia/columbia_utils.py b/cl/corpus_importer/import_columbia/columbia_utils.py
index b1a62cfd6c..8434568c7e 100644
--- a/cl/corpus_importer/import_columbia/columbia_utils.py
+++ b/cl/corpus_importer/import_columbia/columbia_utils.py
@@ -224,7 +224,7 @@ def extract_columbia_opinions(
     """
     opinions: list = []
     floating_content = []
-    order = 0
+    order = 1
 
     # We iterate all content to look for all possible opinions
     for i, content in enumerate(outer_opinion):  # type: int, Tag
@@ -363,7 +363,7 @@ def process_extracted_opinions(extracted_opinions: list) -> list:
 
     opinions: list = []
     authorless_content = []
-    order = 0
+    order = 1
 
     for i, found_content in enumerate(extracted_opinions, start=1):
         byline = found_content.get("byline")
diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 651f7fc928..7b0a3271ef 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -109,32 +109,23 @@ def clean_opinion_content(text: str) -> str:
 
 def get_opinions_cleaned_content(
     cluster_id,
-) -> tuple[Optional[str], list[dict], int, bool]:
+) -> tuple[Optional[str], list[dict]]:
     """Get cleaned opinions content for a cluster object
 
     :param cluster_id: Cluster ID for a set of opinions
-    :return: (xml path, list of extracted opinions, start position, True if combined
-    opinions exists in cluster)
+    :return: (xml path, list of extracted opinions)
     """
     cl_cleaned_opinions = []
     # by default the opinions are ordered by pk
-    opinions_from_cluster = Opinion.objects.filter(
-        cluster_id=cluster_id
-    ).order_by("id")
-    combined_opinions_cluster = opinions_from_cluster.filter(
-        type="010combined"
+    opinions_from_cluster = (
+        Opinion.objects.filter(cluster_id=cluster_id)
+        .order_by("id")
+        .exclude(type="010combined")
     )
+
     xml_path = None
-    cluster_has_combined_opinion = False
-    if combined_opinions_cluster:
-        # the combined opinion will be displayed at beginning
-        start_position = combined_opinions_cluster.count()
-        cluster_has_combined_opinion = True
-    else:
-        # we don't have combined opinions, we start ordering from 0 to n
-        start_position = 0
-
-    for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
+
+    for i, op in enumerate(opinions_from_cluster):
         if op.local_path and not xml_path:
             xml_path = str(op.local_path)
 
@@ -166,8 +157,6 @@ def get_opinions_cleaned_content(
     return (
         xml_path,
         cl_cleaned_opinions,
-        start_position,
-        cluster_has_combined_opinion,
     )
 
 
@@ -196,23 +185,18 @@ def get_opinions_columbia_file(xml_filepath: str) -> list:
     return opinions
 
 
-def update_columbia_opinions(
+def update_opinions(
     cluster_id: int,
     cl_opinions: list,
     columbia_opinions: list,
     matches: dict,
-    cluster_has_combined_opinion: bool,
-    start_position: int,
 ):
     """Update opinions with correct order
 
     :param cluster_id:
     :param cl_opinions: a list with cleaned opinions from cl
-    :param columbia_opinions: a ordered list with cleaned opinions from xml file
+    :param columbia_opinions: an ordered list with cleaned opinions from xml file
     :param matches: a dict with the matches of each opinion of both lists
-    :param cluster_has_combined_opinion: True if the cluster has combined opinions
-    :param start_position: the number from where the order should begin for
-    non-combined opinions
     :return: None
     """
     update_failed = False
@@ -222,7 +206,7 @@ def update_columbia_opinions(
             # file_pos is the correct index to find the opinion id to update
             file_opinion = columbia_opinions[file_pos]
             # the order was calculated using the xml file
-            file_order = file_opinion.get("order") + start_position
+            file_order = file_opinion.get("order")
             cl_opinion = cl_opinions[cl_pos]
             opinion_id_to_update = cl_opinion.get("id")
 
@@ -230,11 +214,10 @@ def update_columbia_opinions(
                 try:
                     # Update opinion order
                     op = Opinion.objects.get(id=opinion_id_to_update)
-                    op.order = file_order
+                    op.ordering_key = file_order
                     op.save()
                 except Opinion.DoesNotExist:
-                    # This should not happen, but it is better to be
-                    # cautious
+                    # This should not happen, but it is better to be cautious
                     logger.warning(
                         f"We can't update opinion, opinion doesn't exist "
                         f"with id: {opinion_id_to_update}"
@@ -242,18 +225,6 @@ def update_columbia_opinions(
                     update_failed = True
                     break
 
-        if cluster_has_combined_opinion and not update_failed:
-            combined_opinions_cluster = Opinion.objects.filter(
-                cluster_id=cluster_id, type="010combined"
-            ).order_by("id")
-
-            # Show combined opinions at beginning
-            for opinion_order, cluster_op in enumerate(
-                combined_opinions_cluster
-            ):
-                cluster_op.order = opinion_order
-                cluster_op.save()
-
         if update_failed:
             # There was an error updating an opinion, rollback all changes for
             # cluster's opinions
@@ -295,12 +266,9 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
         logger.info(f"Processing cluster id: {cluster_id}")
 
         try:
-            (
-                xml_path,
-                cl_cleaned_opinions,
-                start_position,
-                cluster_has_combined_opinion,
-            ) = get_opinions_cleaned_content(cluster_id)
+            xml_path, cl_cleaned_opinions = get_opinions_cleaned_content(
+                cluster_id
+            )
         except EmptyOpinionException:
             logger.warning(
                 f"At least one of the opinions from cluster id: {cluster_id} is empty."
@@ -322,7 +290,9 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                     fixed_xml_filepath
                 )
             except UnicodeDecodeError:
-                logger.warning(f"Cannot decode file: {fixed_xml_filepath}")
+                logger.warning(
+                    f"Cannot decode file: {fixed_xml_filepath}, cluster id: {cluster_id}"
+                )
                 continue
 
         if cl_cleaned_opinions and extracted_columbia_opinions:
@@ -337,6 +307,14 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                 if op.get("opinion")
             ]
 
+            if len(columbia_opinions_content) != len(cl_opinions_content):
+                logger.warning(
+                    f"The number of opinions in cl and the number of opinions in the "
+                    f"xml is different, cluster id: {cluster_id}"
+                )
+                continue
+
+            # Try to match content between cl and xml
             matches = match_opinion_lists(
                 columbia_opinions_content,
                 cl_opinions_content,
@@ -361,14 +339,12 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                     # Go to next cluster id
                     continue
 
-                # Update all opinions order
-                update_columbia_opinions(
+                # All opinions matched, update all opinions order
+                update_opinions(
                     cluster_id,
                     cl_cleaned_opinions,
                     extracted_columbia_opinions,
                     matches,
-                    cluster_has_combined_opinion,
-                    start_position,
                 )
 
 

From fc3e1a1563309ac2e7c16f27664329afe7221124 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Fri, 16 Aug 2024 14:04:31 -0500
Subject: [PATCH 205/372] refactor(cl_back_scrape_citations)

Reword docstrings, catch exceptions and refactor code following code review
---
 .../commands/cl_back_scrape_citations.py      | 43 +++++++++++++------
 cl/scrapers/tests.py                          | 11 +++--
 cl/scrapers/utils.py                          |  7 +--
 3 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/cl/scrapers/management/commands/cl_back_scrape_citations.py b/cl/scrapers/management/commands/cl_back_scrape_citations.py
index 11f269c14b..982abfca1a 100644
--- a/cl/scrapers/management/commands/cl_back_scrape_citations.py
+++ b/cl/scrapers/management/commands/cl_back_scrape_citations.py
@@ -9,6 +9,7 @@
 we ingest it as in a regular scrape
 """
 
+from django.db import IntegrityError
 from django.utils.encoding import force_bytes
 
 from cl.lib.command_utils import logger
@@ -16,7 +17,7 @@
 from cl.scrapers.management.commands import cl_back_scrape_opinions
 from cl.scrapers.management.commands.cl_scrape_opinions import make_citation
 from cl.scrapers.utils import get_binary_content
-from cl.search.models import Citation, Opinion, OpinionCluster
+from cl.search.models import Citation, Opinion
 
 
 class Command(cl_back_scrape_opinions.Command):
@@ -74,13 +75,20 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
                 if not citation_candidate:
                     continue
 
-                if self.citation_is_duplicated(
-                    citation_candidate, cluster, cite
-                ):
+                if self.citation_is_duplicated(citation_candidate, cite):
                     continue
 
-                logger.info("Saving citation %s for cluster %s", cite, cluster)
-                citation_candidate.save()
+                try:
+                    citation_candidate.save()
+                    logger.info(
+                        "Saved citation %s for cluster %s", cite, cluster
+                    )
+                except IntegrityError:
+                    logger.warning(
+                        "Error when saving citation %s for cluster %s",
+                        cite,
+                        cluster,
+                    )
 
         # We don't have these opinions. Since we are backscraping, if the citation
         # exists, it will be in the case dictionary, and will be saved in a
@@ -95,32 +103,43 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
                 logger.info("Run the backscraper to collect missing opinions")
 
     def citation_is_duplicated(
-        self, citation_candidate: Citation, cluster: OpinionCluster, cite: str
+        self, citation_candidate: Citation, cite: str
     ) -> bool:
-        """Checks for exact or reporter duplication of citation in the cluster
-        Inspired on corpus_importer.utils.add_citations_to_cluster
+        """Checks if the citation is duplicated for the cluster
+
+        Following corpus_importer.utils.add_citations_to_cluster we
+        identify 2 types of duplication:
+        - exact: a citation with the same fields already exists for the cluster
+        - duplication in the same reporter: the cluster already has a citation
+            in that reporter
+
+        :param citation_candidate: the citation object
+        :param cite: citation string
+
+        :return: True if citation is duplicated, False if not
         """
         citation_params = {**citation_candidate.__dict__}
         citation_params.pop("_state", "")
         citation_params.pop("id", "")
+        cluster_id = citation_candidate.cluster.id
 
         # Exact duplication
         if Citation.objects.filter(**citation_params).exists():
             logger.info(
                 "Citation '%s' already exists for cluster %s",
                 cite,
-                cluster.id,
+                cluster_id,
             )
             return True
 
         # Duplication in the same reporter
         if Citation.objects.filter(
-            cluster_id=cluster.id, reporter=citation_candidate.reporter
+            cluster_id=cluster_id, reporter=citation_candidate.reporter
         ).exists():
             logger.info(
                 "Another citation in the same reporter '%s' exists for cluster %s",
                 citation_candidate.reporter,
-                cluster.id,
+                cluster_id,
             )
             return True
 
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 0d9382615e..ac6e5de0f7 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -674,7 +674,7 @@ def test_no_content_type(self, mock_get):
             error_mock.assert_not_called()
 
 
-class ScrapeCitationsTestCase(TestCase):
+class ScrapeCitationsTest(TestCase):
     """This class only tests the update of existing clusters
     Since the ingestion of new clusters and their citations call
     super().scrape_court(), it should be tested in the superclass
@@ -716,11 +716,10 @@ def setUp(self):
     def test_citation_scraper(self):
         """Test if citation scraper creates a citation or ignores duplicates"""
         cmd = "cl.scrapers.management.commands.cl_back_scrape_citations"
-        with mock.patch(f"{cmd}.sha1", side_effect=self.hashes):
-            with mock.patch(
-                f"{cmd}.get_binary_content", return_value="placeholder"
-            ):
-                cl_back_scrape_citations.Command().scrape_court(self.mock_site)
+        with mock.patch(f"{cmd}.sha1", side_effect=self.hashes), mock.patch(
+            f"{cmd}.get_binary_content", return_value="placeholder"
+        ):
+            cl_back_scrape_citations.Command().scrape_court(self.mock_site)
 
         citations = Citation.objects.filter(cluster=self.cluster).count()
         self.assertEqual(citations, 1, "Exactly 1 citation was expected")
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index a1a2e6eab2..445797ce45 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -162,9 +162,10 @@ def get_binary_content(
     :param download_url: The URL for the item you wish to download.
     :param site: Site object used to download data
 
-    :return: Two values. The first is a msg indicating any errors encountered.
-    If blank, that indicates success. The second value is the response object
-    containing the downloaded file.
+    :return: One of:
+        - None if there was no URL, if the downloaded file was empty or if
+            the content type did not match the expected types
+        - The downloaded and cleaned content
     """
     court_str = site.court_id.split(".")[-1].split("_")[0]
     fingerprint = [f"{court_str}-unexpected-content-type"]

From d28f261d4694b9d89f2b24e8891b6b00441458fd Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 16 Aug 2024 17:58:11 -0500
Subject: [PATCH 206/372] feat(elasticsearch): Display the initial complaint
 button in RECAP search results.

Fixes: #3798
---
 cl/lib/elasticsearch_utils.py                 |  58 +++++++
 .../templates/includes/search_result.html     |   9 ++
 cl/search/tests/tests_es_recap.py             | 145 ++++++++++++++++++
 3 files changed, 212 insertions(+)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index cb68c1dc7d..aed77cd155 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1789,6 +1789,64 @@ def merge_unavailable_fields_on_parent_document(
                         result["id"], ""
                     )
 
+        case (
+            SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS
+        ) if request_type == "frontend":
+            # Merge initial complaint button to the frontend search results.
+            docket_ids = {doc["docket_id"] for doc in results}
+            # This query retrieves initial complaint documents considering two
+            # possibilities:
+            # 1. For district, bankruptcy, and appellate entries where we don't know
+            #    if the entry contains attachments, it considers:
+            #    document_number=1 and attachment_number=None and document_type=PACER_DOCUMENT
+            #    This represents the main document with document_number 1.
+            # 2. For appellate entries where the attachment page has already been
+            #    merged, it considers:
+            #    document_number=1 and attachment_number=1 and document_type=ATTACHMENT
+            #    This represents document_number 1 that has been converted to an attachment.
+            initial_complaints = (
+                RECAPDocument.objects.filter(
+                    QObject(
+                        QObject(
+                            attachment_number=None,
+                            document_type=RECAPDocument.PACER_DOCUMENT,
+                        )
+                        | QObject(
+                            attachment_number=1,
+                            document_type=RECAPDocument.ATTACHMENT,
+                        )
+                    ),
+                    docket_entry__docket_id__in=docket_ids,
+                    document_number="1",
+                )
+                .select_related("docket_entry")
+                .only(
+                    "document_number",
+                    "attachment_number",
+                    "pacer_doc_id",
+                    "is_available",
+                    "filepath_local",
+                    "docket_entry__docket_id",
+                )
+            )
+            initial_complaints_in_page = {}
+            for initial_complaint in initial_complaints:
+                if initial_complaint.has_valid_pdf:
+                    initial_complaints_in_page[
+                        initial_complaint.docket_entry.docket_id
+                    ] = (initial_complaint.get_absolute_url(), True)
+                else:
+                    initial_complaints_in_page[
+                        initial_complaint.docket_entry.docket_id
+                    ] = (initial_complaint.pacer_url, False)
+
+            for result in results:
+                complaint_url, available = initial_complaints_in_page.get(
+                    result.docket_id, (None, None)
+                )
+                result["initial_complaint_url"] = complaint_url
+                result["available_initial_complaint"] = available
+
         case SEARCH_TYPES.OPINION if request_type == "v4" and not highlight:
             # Retrieves the Opinion plain_text from the DB to fill the snippet
             # when highlighting is disabled. Considering the same prioritization
diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index 635300b8c0..24900362b2 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -207,6 +207,15 @@ <h4>
       {% endwith %}
     {% endfor %}
     <div class="col-md-offset-half">
+      {% if result.available_initial_complaint %}
+        <a href="{{ result.initial_complaint_url }}" class="initial-complaint btn-primary btn">
+          Initial Complaint
+        </a>
+      {% elif result.available_initial_complaint == False %}
+        <a href="{{ result.initial_complaint_url }}" rel="nofollow" target="_blank"  class="initial-complaint btn-primary btn">
+          Buy Initial Complaint
+        </a>
+      {% endif %}
       {% if result.child_remaining %}
         <a href="{% url "show_results" %}?type={{ type|urlencode }}&q={% if request.GET.q %}({{ request.GET.q|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}" class="btn-default btn">
           View Additional Results for this Case
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 8f9b8d4202..73a5c14f5d 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -222,6 +222,21 @@ def _count_child_documents_dict(
             "     Got: %s\n\n" % (field_name, expected_count, got),
         )
 
+    @staticmethod
+    def _parse_initial_complaint_button(response):
+        """Parse the initial complaint button within the HTML response."""
+        tree = html.fromstring(response.content.decode())
+        try:
+            initial_complaint = tree.xpath(
+                "//a[contains(@class, 'initial-complaint')]"
+            )[0]
+        except IndexError:
+            return None, None
+        return (
+            initial_complaint.get("href"),
+            initial_complaint.text_content().strip(),
+        )
+
     def test_has_child_text_queries(self) -> None:
         """Test has_child text queries."""
         cd = {
@@ -2235,6 +2250,136 @@ async def test_fail_rd_type_gracefully_frontend(self) -> None:
         self.assertEqual(r.status_code, 200)
         self.assertIn("encountered an error", r.content.decode())
 
+    def test_initial_complaint_button(self) -> None:
+        """Confirm the initial complaint button is properly shown on different
+        scenarios"""
+
+        # Add dockets with no documents
+        with self.captureOnCommitCallbacks(execute=True):
+
+            # District document initial complaint available
+            de_1 = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court,
+                    case_name="Lorem District vs Complaint Available",
+                    docket_number="1:21-bk-1234",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
+            initial_complaint_1 = RECAPDocumentFactory(
+                docket_entry=de_1,
+                document_number="1",
+                document_type=RECAPDocument.PACER_DOCUMENT,
+                is_available=True,
+                filepath_local=sample_file,
+            )
+
+            # District document initial complaint not available
+            de_2 = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court,
+                    case_name="Lorem District vs Complaint Not Available",
+                    docket_number="1:21-bk-1235",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            initial_complaint_2 = RECAPDocumentFactory(
+                docket_entry=de_2,
+                document_number="1",
+                is_available=False,
+                pacer_doc_id="234563",
+            )
+
+            # Appellate document initial complaint available
+            de_3 = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court,
+                    case_name="Lorem Appellate vs Complaint Available",
+                    docket_number="1:21-bk-1236",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
+            initial_complaint_3 = RECAPDocumentFactory(
+                docket_entry=de_3,
+                document_number="1",
+                attachment_number=1,
+                document_type=RECAPDocument.ATTACHMENT,
+                is_available=True,
+                filepath_local=sample_file,
+            )
+
+            # No DocketEntry for the initial complaint available
+            empty_docket = DocketFactory(
+                court=self.court,
+                case_name="Lorem No Initial Complaint Entry",
+                docket_number="1:21-bk-1237",
+                source=Docket.RECAP,
+            )
+
+        # District document initial complaint available
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Lorem District vs Complaint Available"',
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Complaint available"
+        )
+        button_url, button_text = self._parse_initial_complaint_button(r)
+        self.assertEqual("Initial Complaint", button_text)
+        self.assertEqual(initial_complaint_1.get_absolute_url(), button_url)
+
+        # District document initial complaint not available. Show Buy button.
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Lorem District vs Complaint Not Available"',
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Complaint Not available"
+        )
+        button_url, button_text = self._parse_initial_complaint_button(r)
+        self.assertEqual("Buy Initial Complaint", button_text)
+        self.assertEqual(initial_complaint_2.pacer_url, button_url)
+
+        # Appellate document initial complaint available
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Lorem Appellate vs Complaint Available"',
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Complaint Appellate available"
+        )
+        button_url, button_text = self._parse_initial_complaint_button(r)
+        self.assertEqual("Initial Complaint", button_text)
+        self.assertEqual(initial_complaint_3.get_absolute_url(), button_url)
+
+        # No docket entry is available for the initial complaint. No button is shown.
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Lorem No Initial Complaint Entry"',
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Complaint Entry no available"
+        )
+        button_url, button_text = self._parse_initial_complaint_button(r)
+        self.assertIsNone(button_text)
+        self.assertIsNone(button_url)
+
+        de_1.docket.delete()
+        de_2.docket.delete()
+        de_3.docket.delete()
+        empty_docket.delete()
+
 
 class RECAPSearchAPICommonTests(RECAPSearchTestCase):
 

From ff84c4fb7d6c66c63873c9358b88cd51fbde09e3 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Mon, 19 Aug 2024 12:06:46 -0300
Subject: [PATCH 207/372] issue-4140 Fix get de_list without using form filters
 for csv

---
 cl/opinion_page/views.py | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 3ac8771122..cb614a674e 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -339,7 +339,7 @@ async def redirect_docket_recap(
     )
 
 
-async def fetch_docket_entries(request, docket, form=None):
+async def fetch_docket_entries(request, docket):
     """ Fetch docket entries asociated to docket
 
     param request: current HttpRequest.
@@ -350,8 +350,21 @@ async def fetch_docket_entries(request, docket, form=None):
     de_list = docket.docket_entries.all().prefetch_related(
       Prefetch("recap_documents", queryset=RECAPDocument.objects.defer("plain_text"))
     )
+    return de_list
+
+
+async def view_docket(
+    request: HttpRequest, pk: int, slug: str
+) -> HttpResponse:
+
+    sort_order_asc = True
+    form = DocketEntryFilterForm(request.GET, request=request)
+    docket, context = await core_docket_data(request, pk)
+    await increment_view_count(docket, request)
+
+    de_list = await fetch_docket_entries(request, docket)
+
     if await sync_to_async(form.is_valid)():
-        if not form:
             cd = form.cleaned_data
 
             if cd.get("entry_gte"):
@@ -366,23 +379,9 @@ async def fetch_docket_entries(request, docket, form=None):
               de_list = de_list.order_by(
                   "-recap_sequence_number", "-entry_number"
               )
-    return de_list
-
-
-async def view_docket(
-    request: HttpRequest, pk: int, slug: str
-) -> HttpResponse:
-
-    sort_order_asc = True
-    form = DocketEntryFilterForm(request.GET, request=request)
-    docket, context = await core_docket_data(request, pk)
-    await increment_view_count(docket, request)
-
-    de_list = await fetch_docket_entries(request, docket, form)
 
     page = request.GET.get("page", 1)
 
-
     @sync_to_async
     def paginate_docket_entries(docket_entries, docket_page):
         paginator = Paginator(docket_entries, 200, orphans=10)
@@ -616,10 +615,8 @@ def generate_csv(de_list, filename):
         logger.debug(f"HERE: {filename}")
         return response
 
-    #FIXME delete form without modifying solution
-    form = DocketEntryFilterForm(request.POST, request=request)
     docket, _ = await core_docket_data(request, docket_id)
-    de_list = await fetch_docket_entries(request, docket, form)
+    de_list = await fetch_docket_entries(request, docket)
     court_id = docket.court_id
     case_name = docket.slug
 
@@ -631,7 +628,6 @@ def generate_csv(de_list, filename):
     return response
 
 
-
 async def view_recap_document(
     request: HttpRequest,
     docket_id: int | None = None,

From 872efb3dca1b0c98580f02c62c0f0c0baba9aaf4 Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Mon, 19 Aug 2024 15:51:27 +0000
Subject: [PATCH 208/372] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index acbbfed285..80970a13f1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2236,13 +2236,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.16"
+version = "2.6.17"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.16-py27-none-any.whl", hash = "sha256:ffaefea58f3fb88a195bd50f6cb783ca9b0029a421ef0b1118568fc5dd303377"},
-    {file = "juriscraper-2.6.16.tar.gz", hash = "sha256:42a2c95e36300369bba0496eea618401f5c409033d87ba6cd83074a988ed72b3"},
+    {file = "juriscraper-2.6.17-py27-none-any.whl", hash = "sha256:c73fb29ebbb6faa489507a000880c3d8c5681cce4a4ef0e9afa9674075a9b826"},
+    {file = "juriscraper-2.6.17.tar.gz", hash = "sha256:16b2b2fc0cacdf09ffb1712d42fe3ca055722414e1fec2082f65cd467fe09f82"},
 ]
 
 [package.dependencies]

From 1f4a2c5c2e0734428fa6a468cd2c9b95f25c58b5 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Mon, 19 Aug 2024 13:41:22 -0300
Subject: [PATCH 209/372] issue-4140 Fix repeated colums in csv file addeing
 class name to columns

---
 cl/search/models.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index a81f67e39f..503b2d5ede 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -1167,6 +1167,9 @@ def to_csv_row(self) -> List[str]:
             row.append(attr)
         return row
 
+    def add_class_name(self, attribute_name):
+        return f"{self.__class__.__name__.lower()}_{attribute_name}"
+
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
 class DocketEntry(AbstractDateTimeModel, CSVExportMixin):
@@ -1291,19 +1294,17 @@ def datetime_filed(self) -> datetime | None:
         return None
 
     def get_csv_columns(self, get_column_name=False):
-        columns = []
-        if get_column_name:
-            columns.append(self.__class__.__name__.lower() + "_id")
-        else:
-            columns.append("id")
-        columns.extend([
+        columns = [
+            "id",
             "entry_number",
             "date_filed",
             "time_filed",
             "pacer_sequence_number",
             "recap_sequence_number",
             "description"
-        ])
+        ]
+        if get_column_name:
+            columns = [self.add_class_name(col) for col in columns]
         return columns
 
     def get_column_fuction(self):
@@ -1797,12 +1798,8 @@ def as_search_dict(self, docket_metadata=None):
         return normalize_search_dicts(out)
 
     def get_csv_columns(self, get_column_name=False):
-        columns = []
-        if get_column_name:
-            columns.append(self.__class__.__name__.lower() + "_id")
-        else:
-            columns.append("id")
-        columns.extend([
+        columns = [
+            "id",
             "document_type",
             "description",
             "acms_document_guid",
@@ -1819,7 +1816,9 @@ def get_csv_columns(self, get_column_name=False):
             "filepath_local",
             "filepath_ia",
             "ocr_status"
-        ])
+        ]
+        if get_column_name:
+            columns = [self.add_class_name(col) for col in columns]
         return columns
 
     def _get_readable_document_type(self, *args, **kwargs):

From 9f7254cd4e6ea05debf70d2997bc22fa7b640488 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 19 Aug 2024 14:12:33 -0500
Subject: [PATCH 210/372] fix(elasticsearch): Eliminated unnecessary queries
 related to the initial complaint

- Added additional test related to appellate documents without pacer_doc_id
---
 cl/lib/elasticsearch_utils.py                 | 24 +++++++---
 cl/search/signals.py                          |  5 ++-
 .../templates/includes/search_result.html     |  6 +--
 cl/search/tests/tests_es_recap.py             | 45 ++++++++++++++++---
 4 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index aed77cd155..2d229e0015 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1819,14 +1819,24 @@ def merge_unavailable_fields_on_parent_document(
                     docket_entry__docket_id__in=docket_ids,
                     document_number="1",
                 )
-                .select_related("docket_entry")
+                .select_related(
+                    "docket_entry",
+                    "docket_entry__docket",
+                    "docket_entry__docket__court",
+                )
                 .only(
+                    "pk",
+                    "document_type",
                     "document_number",
                     "attachment_number",
                     "pacer_doc_id",
                     "is_available",
                     "filepath_local",
                     "docket_entry__docket_id",
+                    "docket_entry__docket__slug",
+                    "docket_entry__docket__pacer_case_id",
+                    "docket_entry__docket__court__jurisdiction",
+                    "docket_entry__docket__court_id",
                 )
             )
             initial_complaints_in_page = {}
@@ -1834,18 +1844,20 @@ def merge_unavailable_fields_on_parent_document(
                 if initial_complaint.has_valid_pdf:
                     initial_complaints_in_page[
                         initial_complaint.docket_entry.docket_id
-                    ] = (initial_complaint.get_absolute_url(), True)
+                    ] = (initial_complaint.get_absolute_url(), None)
                 else:
                     initial_complaints_in_page[
                         initial_complaint.docket_entry.docket_id
-                    ] = (initial_complaint.pacer_url, False)
+                    ] = (None, initial_complaint.pacer_url)
 
             for result in results:
-                complaint_url, available = initial_complaints_in_page.get(
-                    result.docket_id, (None, None)
+                complaint_url, buy_complaint_url = (
+                    initial_complaints_in_page.get(
+                        result.docket_id, (None, None)
+                    )
                 )
                 result["initial_complaint_url"] = complaint_url
-                result["available_initial_complaint"] = available
+                result["buy_initial_complaint_url"] = buy_complaint_url
 
         case SEARCH_TYPES.OPINION if request_type == "v4" and not highlight:
             # Retrieves the Opinion plain_text from the DB to fill the snippet
diff --git a/cl/search/signals.py b/cl/search/signals.py
index 2d78f81c20..87d356d717 100644
--- a/cl/search/signals.py
+++ b/cl/search/signals.py
@@ -329,11 +329,11 @@
         RECAPDocument: {
             "self": {
                 "description": ["short_description"],
-                "document_type": ["document_type"],
+                "document_type": ["document_type", "absolute_url"],
                 "document_number": ["document_number", "absolute_url"],
                 "pacer_doc_id": ["pacer_doc_id"],
                 "plain_text": ["plain_text"],
-                "attachment_number": ["attachment_number"],
+                "attachment_number": ["attachment_number", "absolute_url"],
                 "is_available": ["is_available"],
                 "page_count": ["page_count"],
                 "filepath_local": ["filepath_local"],
@@ -364,6 +364,7 @@
                 "assigned_to_str": ["assignedTo"],
                 "referred_to_str": ["referredTo"],
                 "pacer_case_id": ["pacer_case_id"],
+                "slug": ["absolute_url"],
             }
         },
         Person: {
diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index 24900362b2..85cc6706e1 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -207,12 +207,12 @@ <h4>
       {% endwith %}
     {% endfor %}
     <div class="col-md-offset-half">
-      {% if result.available_initial_complaint %}
+      {% if result.initial_complaint_url %}
         <a href="{{ result.initial_complaint_url }}" class="initial-complaint btn-primary btn">
           Initial Complaint
         </a>
-      {% elif result.available_initial_complaint == False %}
-        <a href="{{ result.initial_complaint_url }}" rel="nofollow" target="_blank"  class="initial-complaint btn-primary btn">
+      {% elif result.buy_initial_complaint_url %}
+        <a href="{{ result.buy_initial_complaint_url }}" rel="nofollow" target="_blank"  class="initial-complaint btn-primary btn">
           Buy Initial Complaint
         </a>
       {% endif %}
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 73a5c14f5d..6bd02259ac 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2297,10 +2297,29 @@ def test_initial_complaint_button(self) -> None:
                 pacer_doc_id="234563",
             )
 
-            # Appellate document initial complaint available
+            # Appellate document initial not available and not pacer_doc_id
             de_3 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
-                    court=self.court,
+                    court=self.court_2,
+                    case_name="Appellate Complaint Not Available no pacer_doc_id",
+                    docket_number="1:21-bk-1235",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            initial_complaint_3 = RECAPDocumentFactory(
+                docket_entry=de_3,
+                document_number="1",
+                is_available=False,
+                pacer_doc_id=None,
+            )
+
+            # Appellate document initial complaint available
+            de_4 = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court_2,
                     case_name="Lorem Appellate vs Complaint Available",
                     docket_number="1:21-bk-1236",
                     source=Docket.RECAP,
@@ -2310,8 +2329,8 @@ def test_initial_complaint_button(self) -> None:
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
             sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
-            initial_complaint_3 = RECAPDocumentFactory(
-                docket_entry=de_3,
+            initial_complaint_4 = RECAPDocumentFactory(
+                docket_entry=de_4,
                 document_number="1",
                 attachment_number=1,
                 document_type=RECAPDocument.ATTACHMENT,
@@ -2348,7 +2367,7 @@ def test_initial_complaint_button(self) -> None:
             cd, 1, "Complaint Not available"
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
-        self.assertEqual("Buy Initial Complaint", button_text)
+        self.assertEqual("Buy Initial Complaint", button_text, msg="Error 1")
         self.assertEqual(initial_complaint_2.pacer_url, button_url)
 
         # Appellate document initial complaint available
@@ -2361,7 +2380,7 @@ def test_initial_complaint_button(self) -> None:
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
         self.assertEqual("Initial Complaint", button_text)
-        self.assertEqual(initial_complaint_3.get_absolute_url(), button_url)
+        self.assertEqual(initial_complaint_4.get_absolute_url(), button_url)
 
         # No docket entry is available for the initial complaint. No button is shown.
         cd = {
@@ -2375,9 +2394,23 @@ def test_initial_complaint_button(self) -> None:
         self.assertIsNone(button_text)
         self.assertIsNone(button_url)
 
+        # Appellate document initial not available and not pacer_doc_id.
+        # No button is shown.
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Appellate Complaint Not Available no pacer_doc_id"',
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Appellate Complaint button no available"
+        )
+        button_url, button_text = self._parse_initial_complaint_button(r)
+        self.assertIsNone(button_text)
+        self.assertIsNone(button_url)
+
         de_1.docket.delete()
         de_2.docket.delete()
         de_3.docket.delete()
+        de_4.docket.delete()
         empty_docket.delete()
 
 

From 59e18ad144092c41ea831c723c2656cddc763d55 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 19 Aug 2024 15:30:33 -0500
Subject: [PATCH 211/372] build(deps): bump django-debug-toolbar from 4.2.0 to
 4.4.6

---
 poetry.lock    | 12 ++++++------
 pyproject.toml |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 80970a13f1..13937db205 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -977,17 +977,17 @@ tests = ["jinja2 (>=2.9.6)", "pytest", "pytest-cov", "pytest-django", "pytest-ru
 
 [[package]]
 name = "django-debug-toolbar"
-version = "4.2.0"
+version = "4.4.6"
 description = "A configurable set of panels that display various debug information about the current request/response."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "django_debug_toolbar-4.2.0-py3-none-any.whl", hash = "sha256:af99128c06e8e794479e65ab62cc6c7d1e74e1c19beb44dcbf9bad7a9c017327"},
-    {file = "django_debug_toolbar-4.2.0.tar.gz", hash = "sha256:bc7fdaafafcdedefcc67a4a5ad9dac96efd6e41db15bc74d402a54a2ba4854dc"},
+    {file = "django_debug_toolbar-4.4.6-py3-none-any.whl", hash = "sha256:3beb671c9ec44ffb817fad2780667f172bd1c067dbcabad6268ce39a81335f45"},
+    {file = "django_debug_toolbar-4.4.6.tar.gz", hash = "sha256:36e421cb908c2f0675e07f9f41e3d1d8618dc386392ec82d23bcfcd5d29c7044"},
 ]
 
 [package.dependencies]
-django = ">=3.2.4"
+django = ">=4.2.9"
 sqlparse = ">=0.2"
 
 [[package]]
@@ -5466,4 +5466,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "614bfda2bba639b7ec0952bc6833cd5be0771abd15357232149028dc282b060e"
+content-hash = "df2df58d268dcb17534329b64dd697df0a141b17d57f0777e6648e9c8cdba493"
diff --git a/pyproject.toml b/pyproject.toml
index a9934d5f32..20e46f8a2f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,6 +116,7 @@ openai = "^1.31.1"
 seal-rookery = "^2.2.3"
 types-pytz = "^2024.1.0.20240417"
 juriscraper = "^2.6.15"
+django-debug-toolbar = "^4.4.6"
 
 
 [tool.poetry.group.dev.dependencies]
@@ -136,7 +137,6 @@ djangorestframework-stubs = "^3.14.5"
 black = "^23.12.1"
 types-simplejson = "^3.19.0.2"
 lxml-stubs = "^0.5.1"
-django-debug-toolbar = "^4.2.0"
 
 [tool.black]
 include = '''

From 41468c9a639d3b67a898e5b535cbc864c16afa69 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 19 Aug 2024 16:34:56 -0500
Subject: [PATCH 212/372] build(deps): Added django-debug-toolbar as dev dep

---
 poetry.lock    | 2 +-
 pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 13937db205..3419c4fb05 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -5466,4 +5466,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "df2df58d268dcb17534329b64dd697df0a141b17d57f0777e6648e9c8cdba493"
+content-hash = "7a2f54103ce6aaa8d20563aa306fcfa93e656b8e10f748e88bad288dc0d44ebe"
diff --git a/pyproject.toml b/pyproject.toml
index 20e46f8a2f..636a556658 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,7 +116,6 @@ openai = "^1.31.1"
 seal-rookery = "^2.2.3"
 types-pytz = "^2024.1.0.20240417"
 juriscraper = "^2.6.15"
-django-debug-toolbar = "^4.4.6"
 
 
 [tool.poetry.group.dev.dependencies]
@@ -137,6 +136,7 @@ djangorestframework-stubs = "^3.14.5"
 black = "^23.12.1"
 types-simplejson = "^3.19.0.2"
 lxml-stubs = "^0.5.1"
+django-debug-toolbar = "^4.4.6"
 
 [tool.black]
 include = '''

From e3bcf700d66cb484df3bf46e9be5573f26c3e52d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 19 Aug 2024 18:25:00 -0600
Subject: [PATCH 213/372] feat(update_opinions_order): add arguments required
 for columbia ordering, update log messages, update typing

---
 .../commands/update_opinions_order.py         | 39 +++++++++++++------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 7b0a3271ef..4cf615f500 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -25,7 +25,7 @@
 ]
 
 
-def sort_harvard_opinions(options) -> None:
+def sort_harvard_opinions(options: dict) -> None:
     """Sort harvard opinions
 
     We assume that harvard data is already ordered, we just need to fill
@@ -239,15 +239,16 @@ def update_opinions(
             )
 
 
-def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
+def sort_columbia_opinions(options: dict) -> None:
     """Update opinion ordering for columbia clusters
 
-    :param start_id: skip any id lower than this value
-    :param end_id: skip any id greater than this value
-    :param xml_dir: absolute path to the directory with columbia xml files
+    :param options: dict of arguments passed to the command
     :return: None
     """
 
+    skip_until = options.get("skip_until", None)
+    limit = options.get("limit", None)
+
     # Get all columbia cluster ids with more than one opinion
     clusters = (
         OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
@@ -256,11 +257,11 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
         .values_list("id", flat=True)
     )
 
-    if start_id:
-        clusters = filter(lambda x: x >= start_id, clusters)
+    if skip_until:
+        clusters = clusters.filter(pk__gte=skip_until)
 
-    if end_id:
-        clusters = filter(lambda x: x <= end_id, clusters)
+    if limit:
+        clusters = clusters[:limit]
 
     for cluster_id in clusters:
         logger.info(f"Processing cluster id: {cluster_id}")
@@ -271,13 +272,23 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
             )
         except EmptyOpinionException:
             logger.warning(
-                f"At least one of the opinions from cluster id: {cluster_id} is empty."
+                f"At least one of the html_columbia fields in the opinions from "
+                f"cluster id: {cluster_id} is empty"
+            )
+            continue
+
+        if not xml_path:
+            logger.warning(
+                f"Unable to find an xml file assigned to any opinion for cluster id: "
+                f"{cluster_id}"
             )
             continue
 
         extracted_columbia_opinions = None
         if xml_path:
-            fixed_xml_filepath = os.path.join(xml_dir, fix_filepath(xml_path))
+            fixed_xml_filepath = os.path.join(
+                options.get("xml_dir"), fix_filepath(xml_path)
+            )
 
             if not os.path.exists(fixed_xml_filepath):
                 logger.warning(
@@ -390,6 +401,12 @@ def add_arguments(self, parser):
             help="How long to wait to update each opinion (in seconds, allows "
             "floating numbers).",
         )
+        parser.add_argument(
+            "--xml-dir",
+            default="/opt/courtlistener/_columbia",
+            required=False,
+            help="The absolute path to the directory with columbia xml files",
+        )
 
     def handle(self, *args, **options):
         super().handle(*args, **options)

From d3bd1f0b74b45bef372f8a532a480e3af491c5da Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 19 Aug 2024 19:30:09 -0500
Subject: [PATCH 214/372] fix(elasticsearch): Avoid matching Document 1 and
 Attachment 1 in district and bankruptcy courts.

---
 cl/lib/elasticsearch_utils.py     |  7 +++++++
 cl/search/tests/tests_es_recap.py | 22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 2d229e0015..14c872ad79 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1804,6 +1804,12 @@ def merge_unavailable_fields_on_parent_document(
             #    merged, it considers:
             #    document_number=1 and attachment_number=1 and document_type=ATTACHMENT
             #    This represents document_number 1 that has been converted to an attachment.
+
+            appellate_court_ids = (
+                Court.federal_courts.appellate_pacer_courts().values_list(
+                    "pk", flat=True
+                )
+            )
             initial_complaints = (
                 RECAPDocument.objects.filter(
                     QObject(
@@ -1814,6 +1820,7 @@ def merge_unavailable_fields_on_parent_document(
                         | QObject(
                             attachment_number=1,
                             document_type=RECAPDocument.ATTACHMENT,
+                            docket_entry__docket__court_id__in=appellate_court_ids,
                         )
                     ),
                     docket_entry__docket_id__in=docket_ids,
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 6bd02259ac..23e98839a3 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2276,6 +2276,17 @@ def test_initial_complaint_button(self) -> None:
                 document_type=RECAPDocument.PACER_DOCUMENT,
                 is_available=True,
                 filepath_local=sample_file,
+                pacer_doc_id="1234567",
+            )
+            # This attachment 1 should be ignored for non-appellate courts
+            RECAPDocumentFactory(
+                docket_entry=de_1,
+                document_number="1",
+                attachment_number=1,
+                document_type=RECAPDocument.ATTACHMENT,
+                is_available=True,
+                filepath_local=sample_file,
+                pacer_doc_id="1234568",
             )
 
             # District document initial complaint not available
@@ -2293,9 +2304,19 @@ def test_initial_complaint_button(self) -> None:
             initial_complaint_2 = RECAPDocumentFactory(
                 docket_entry=de_2,
                 document_number="1",
+                document_type=RECAPDocument.PACER_DOCUMENT,
                 is_available=False,
                 pacer_doc_id="234563",
             )
+            # This attachment 1 should be ignored for non-appellate courts
+            RECAPDocumentFactory(
+                docket_entry=de_2,
+                document_number="1",
+                attachment_number=1,
+                document_type=RECAPDocument.ATTACHMENT,
+                is_available=False,
+                pacer_doc_id="234564",
+            )
 
             # Appellate document initial not available and not pacer_doc_id
             de_3 = DocketEntryWithParentsFactory(
@@ -2336,6 +2357,7 @@ def test_initial_complaint_button(self) -> None:
                 document_type=RECAPDocument.ATTACHMENT,
                 is_available=True,
                 filepath_local=sample_file,
+                pacer_doc_id="7654321",
             )
 
             # No DocketEntry for the initial complaint available

From 5a738b34f33e6cfae7a21e7f3b5933af0974bbb5 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Tue, 20 Aug 2024 11:35:35 -0500
Subject: [PATCH 215/372] refactor(cl_scrape_opinions): function for ingesting
 a single case

- create cl.scrapers.exceptions file to hold exceptions raised when ingesting a single case
- use the exceptions to bubble errors to the main loop, to avoid returning break / continue flags
- refactor DupChecker to raise errors
- refactor get_binary_content to raise errors
- refactor cl_scrape_oral_arguments to new paradigm
- cl_back_scrape_citations can now re-scrape a single case without re-downloading the binary content or manipulating the site object
- adapted DupChecker and ContentType tests to changes
- refactores logger.calls to use lazy formatting
---
 cl/scrapers/DupChecker.py                     |  99 +++++-----
 cl/scrapers/exceptions.py                     |  82 ++++++++
 .../commands/cl_back_scrape_citations.py      |  40 ++--
 .../management/commands/cl_scrape_opinions.py | 185 +++++++++++-------
 .../commands/cl_scrape_oral_arguments.py      | 110 ++++-------
 cl/scrapers/tests.py                          | 151 ++++++--------
 cl/scrapers/utils.py                          |  35 ++--
 7 files changed, 379 insertions(+), 323 deletions(-)
 create mode 100644 cl/scrapers/exceptions.py

diff --git a/cl/scrapers/DupChecker.py b/cl/scrapers/DupChecker.py
index 7d5581955e..10cac56334 100644
--- a/cl/scrapers/DupChecker.py
+++ b/cl/scrapers/DupChecker.py
@@ -1,5 +1,9 @@
 from juriscraper.AbstractSite import logger
 
+from cl.scrapers.exceptions import (
+    ConsecutiveDuplicatesError,
+    SingleDuplicateError,
+)
 from cl.scrapers.models import UrlHash
 from cl.search.models import Court
 
@@ -19,7 +23,6 @@ def __init__(
         self.url_hash = None
         self.dup_count = 0
         self.last_found_date = None
-        self.emulate_break = False
         super().__init__(*args, **kwargs)
 
     def _increment(self, current_date):
@@ -83,29 +86,29 @@ def press_on(
         lookup_by="sha1",
     ):
         """Checks if a we have an `object_type` with identical content in the CL
-        corpus by looking up `lookup_value` in the `lookup_by` field. Depending
-        on the result of that, we either return True or False. True represents
-        the fact that the next item should be processed. False means that either
-        the item was a duplicate or that we've hit so many duplicates that we've
-        stopped checking (we hit a duplicate threshold). Either way, the caller
-        should move to the next item and try it.
+        corpus by looking up `lookup_value` in the `lookup_by` field.
 
-        The effect of this is that this emulates for loop constructs for
-        continue (False), break (False), return (True).
+        If the item is not a duplicate, we will return None, and the caller
+        will proceed normally
+
+        If the item is a duplicate, we will raise SingleDuplicateError
+
+        If the item is a duplicate following a series of duplicates greater than
+        our tolerance threshold, we will raise ConsecutiveDuplicatesError
+
+        If the item is a duplicate and the next item is from an already scraped
+        date, we will raise ConsecutiveDuplicatesError
 
         Following logic applies:
+         - if we do not have the item
+            - early return
          - if we have the item already
             - and if the next date is before this date
             - or if this is our duplicate threshold is exceeded
                 - break
             - otherwise
                 - continue
-         - if not
-            - carry on
         """
-        if self.emulate_break:
-            return False
-
         # check for a duplicate in the db.
         if lookup_by == "sha1":
             exists = object_type.objects.filter(sha1=lookup_value).exists()
@@ -116,41 +119,35 @@ def press_on(
         else:
             raise NotImplementedError("Unknown lookup_by parameter.")
 
-        if exists:
-            logger.info(
-                f"Duplicate found on date: {current_date}, with lookup value: {lookup_value}"
-            )
-            self._increment(current_date)
-
-            # If the next date in the Site object is less than (before) the
-            # current date, we needn't continue because we should already have
-            # that item.
-            if next_date:
-                already_scraped_next_date = next_date < current_date
-            else:
-                already_scraped_next_date = True
-            if not self.full_crawl:
-                if already_scraped_next_date:
-                    if self.court.pk == "mich":
-                        # Michigan sometimes has multiple occurrences of the
-                        # same case with different dates on a page.
-                        return False
-                    else:
-                        logger.info(
-                            "Next case occurs prior to when we found a "
-                            "duplicate. Court is up to date."
-                        )
-                        self.emulate_break = True
-                        return False
-                elif self.dup_count >= self.dup_threshold:
-                    logger.info(
-                        f"Found {self.dup_count} duplicates in a row. Court is up to date."
-                    )
-                    self.emulate_break = True
-                    return False
-            else:
-                # This is a full crawl. Do not emulate a break, BUT be sure to
-                # say that we shouldn't press on, since the item already exists.
-                return False
+        if not exists:
+            return
+
+        logger.info(
+            f"Duplicate found on date: {current_date}, with lookup value: {lookup_value}"
+        )
+        self._increment(current_date)
+
+        # If the next date in the Site object is less than (before) the
+        # current date, we needn't continue because we should already have
+        # that item.
+        if next_date:
+            already_scraped_next_date = next_date < current_date
+        else:
+            already_scraped_next_date = True
+
+        if not self.full_crawl:
+            if already_scraped_next_date:
+                if self.court.pk == "mich":
+                    # Michigan sometimes has multiple occurrences of the
+                    # same case with different dates on a page.
+                    raise SingleDuplicateError(logger=logger)
+                else:
+                    message = "Next case occurs prior to when we found a duplicate. Court is up to date."
+                    raise ConsecutiveDuplicatesError(message, logger=logger)
+            elif self.dup_count >= self.dup_threshold:
+                message = f"Found {self.dup_count} duplicates in a row. Court is up to date."
+                raise ConsecutiveDuplicatesError(message, logger=logger)
         else:
-            return True
+            # This is a full crawl. Do not raise a loop breaking `ConsecutiveDuplicatesError`,
+            # but say that we shouldn't press on, since the item already exists.
+            raise SingleDuplicateError(logger=logger)
diff --git a/cl/scrapers/exceptions.py b/cl/scrapers/exceptions.py
new file mode 100644
index 0000000000..dcefbb80d1
--- /dev/null
+++ b/cl/scrapers/exceptions.py
@@ -0,0 +1,82 @@
+import logging
+from typing import Optional
+
+from cl.lib.command_utils import logger
+
+
+class AutoLoggingException(Exception):
+    """Exception with defaults for logging, to be subclassed
+
+    We log expected exceptions to better understand what went wrong
+    Logger calls with level `logging.ERROR` are sent to Sentry, and
+    it's useful to send a `fingerprint` to force a specific grouping by court
+
+    Other `logger` calls are just printed on the console when using a
+    VerboseCommand with proper verbosity levels
+    """
+
+    logging_level = logging.DEBUG
+    message = ""
+    logger = logger
+
+    def __init__(
+        self,
+        message: str = "",
+        logger: Optional[logging.Logger] = None,
+        logging_level: Optional[int] = None,
+        fingerprint: Optional[list[str]] = None,
+    ):
+        if not message:
+            message = self.message
+        if not logger:
+            logger = self.logger
+        if not logging_level:
+            logging_level = self.logging_level
+
+        log_kwargs = {}
+        if fingerprint:
+            log_kwargs["extra"] = {"fingerprint": fingerprint}
+
+        logger.log(logging_level, message, **log_kwargs)
+        super().__init__(message)
+
+
+class ConsecutiveDuplicatesError(AutoLoggingException):
+    """Occurs when consecutive `SingleDuplicateError` are found,
+    which may be used as a signal to break the scraping loop
+    """
+
+    message = "DupChecker emulate break triggered."
+
+
+class SingleDuplicateError(AutoLoggingException):
+    """Occurs when an opinion or audio file already exists
+    in our database
+    """
+
+    message = "Skipping opinion due to duplicated content hash"
+
+
+class BadContentError(AutoLoggingException):
+    """Parent class for errors raised when downloading binary content"""
+
+
+class UnexpectedContentTypeError(BadContentError):
+    """Occurs when the content received from the server has
+    a different content type than the ones listed on
+    site.expected_content_types
+    """
+
+    logging_level = logging.ERROR
+
+
+class NoDownloadUrlError(BadContentError):
+    """Occurs when a DeferredList fetcher fails."""
+
+    logging_level = logging.ERROR
+
+
+class EmptyFileError(BadContentError):
+    """Occurs when the content of the response has lenght 0"""
+
+    logging_level = logging.ERROR
diff --git a/cl/scrapers/management/commands/cl_back_scrape_citations.py b/cl/scrapers/management/commands/cl_back_scrape_citations.py
index 982abfca1a..8975461217 100644
--- a/cl/scrapers/management/commands/cl_back_scrape_citations.py
+++ b/cl/scrapers/management/commands/cl_back_scrape_citations.py
@@ -14,14 +14,22 @@
 
 from cl.lib.command_utils import logger
 from cl.lib.crypto import sha1
+from cl.scrapers.DupChecker import DupChecker
+from cl.scrapers.exceptions import BadContentError
 from cl.scrapers.management.commands import cl_back_scrape_opinions
 from cl.scrapers.management.commands.cl_scrape_opinions import make_citation
 from cl.scrapers.utils import get_binary_content
-from cl.search.models import Citation, Opinion
+from cl.search.models import Citation, Court, Opinion
 
 
 class Command(cl_back_scrape_opinions.Command):
-    def scrape_court(self, site, full_crawl=False, ocr_available=True):
+    def scrape_court(
+        self,
+        site,
+        full_crawl: bool = False,
+        ocr_available: bool = True,
+        backscrape: bool = False,
+    ):
         """
         If the scraped case has citation data
             Check for Opinion existance via content hash
@@ -35,8 +43,9 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
         :param site: scraper object that has already downloaded
             it's case data
         """
-        missing_opinions = []
         court_str = site.court_id.split(".")[-1].split("_")[0]
+        court = Court.objects.get(id=court_str)
+        dup_checker = DupChecker(court, full_crawl=True)
 
         for case in site:
             citation = case.get("citations")
@@ -48,16 +57,19 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
                 )
                 continue
 
-            content = get_binary_content(case["download_urls"], site)
-            if not content:
-                # Errors are logged by get_binary_content itself
+            try:
+                content = get_binary_content(case["download_urls"], site)
+            except BadContentError:
                 continue
+
             sha1_hash = sha1(force_bytes(content))
 
             try:
                 cluster = Opinion.objects.get(sha1=sha1_hash).cluster
             except Opinion.DoesNotExist:
-                missing_opinions.append(case)
+                # populate special key to avoid downloading the file again
+                case["content"] = content
+
                 logger.info(
                     "Case '%s', opinion '%s' has no matching hash in the DB. "
                     "Has a citation '%s'. Will try to ingest all objects",
@@ -65,6 +77,8 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
                     case["download_urls"],
                     citation or parallel_citation,
                 )
+
+                self.ingest_a_case(case, None, True, site, dup_checker, court)
                 continue
 
             for cite in [citation, parallel_citation]:
@@ -90,18 +104,6 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
                         cluster,
                     )
 
-        # We don't have these opinions. Since we are backscraping, if the citation
-        # exists, it will be in the case dictionary, and will be saved in a
-        # regular ingestion process
-        if missing_opinions:
-            # It is easy to ingest a filtered list of cases for OpinionSiteLinear
-            # but not for plain OpinionSite
-            if hasattr(site, "cases"):
-                site.cases = missing_opinions
-                super().scrape_court(site, full_crawl=True)
-            else:
-                logger.info("Run the backscraper to collect missing opinions")
-
     def citation_is_duplicated(
         self, citation_candidate: Citation, cite: str
     ) -> bool:
diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index a147da1182..f1547d8f7d 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -1,6 +1,7 @@
 import signal
 import sys
 import time
+import traceback
 from datetime import date
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -22,6 +23,11 @@
 from cl.lib.string_utils import trunc
 from cl.people_db.lookup_utils import lookup_judges_by_messy_str
 from cl.scrapers.DupChecker import DupChecker
+from cl.scrapers.exceptions import (
+    BadContentError,
+    ConsecutiveDuplicatesError,
+    SingleDuplicateError,
+)
 from cl.scrapers.tasks import extract_doc_content
 from cl.scrapers.utils import (
     get_binary_content,
@@ -213,6 +219,7 @@ def save_everything(
 
 class Command(VerboseCommand):
     help = "Runs the Juriscraper toolkit against one or many jurisdictions."
+    object_type = "opinions"  # for logging purposes
 
     def __init__(self, stdout=None, stderr=None, no_color=False):
         super().__init__(stdout=None, stderr=None, no_color=False)
@@ -261,7 +268,13 @@ def add_arguments(self, parser):
             help="Disable duplicate aborting.",
         )
 
-    def scrape_court(self, site, full_crawl=False, ocr_available=True):
+    def scrape_court(
+        self,
+        site,
+        full_crawl: bool = False,
+        ocr_available: bool = True,
+        backscrape: bool = False,
+    ):
         # Get the court object early for logging
         # opinions.united_states.federal.ca9_u --> ca9
         court_str = site.court_id.split(".")[-1].split("_")[0]
@@ -273,97 +286,116 @@ def scrape_court(self, site, full_crawl=False, ocr_available=True):
             return
 
         if site.cookies:
-            logger.info(f"Using cookies: {site.cookies}")
-        logger.debug(f"#{len(site)} opinions found.")
+            logger.info("Using cookies: %s", site.cookies)
+
+        logger.debug("#%s opinions found.", len(site))
+
         added = 0
         for i, item in enumerate(site):
-            content = get_binary_content(item["download_urls"], site)
-            if not content:
-                continue
-
-            current_date = item["case_dates"]
             try:
                 next_date = site[i + 1]["case_dates"]
             except IndexError:
                 next_date = None
 
-            # request.content is sometimes a str, sometimes unicode, so
-            # force it all to be bytes, pleasing hashlib.
-            sha1_hash = sha1(force_bytes(content))
-            if (
-                court_str == "nev"
-                and item["precedential_statuses"] == "Unpublished"
-            ) or court_str in ["neb"]:
-                # Nevada's non-precedential cases have different SHA1 sums
-                # every time.
-
-                # Nebraska updates the pdf causing the SHA1 to not match
-                # the opinions in CL causing duplicates. See CL issue #1452
-
-                lookup_params = {
-                    "lookup_value": item["download_urls"],
-                    "lookup_by": "download_url",
-                }
-            else:
-                lookup_params = {
-                    "lookup_value": sha1_hash,
-                    "lookup_by": "sha1",
-                }
-
-            proceed = dup_checker.press_on(
-                Opinion, current_date, next_date, **lookup_params
-            )
-            if dup_checker.emulate_break:
-                logger.debug("Emulate break triggered.")
+            try:
+                self.ingest_a_case(
+                    item, next_date, ocr_available, site, dup_checker, court
+                )
+                added += 1
+            except ConsecutiveDuplicatesError:
                 break
-            if not proceed:
-                logger.debug("Skipping opinion.")
-                continue
-
-            # Not a duplicate, carry on
-            logger.info(
-                f"Adding new document found at: {item['download_urls'].encode()}"
-            )
-            dup_checker.reset()
-
-            child_court = get_child_court(
-                item.get("child_courts", ""), court.id
-            )
-
-            docket, opinion, cluster, citations = make_objects(
-                item, child_court or court, sha1_hash, content
-            )
-
-            save_everything(
-                items={
-                    "docket": docket,
-                    "opinion": opinion,
-                    "cluster": cluster,
-                    "citations": citations,
-                },
-                index=False,
-            )
-            extract_doc_content.delay(
-                opinion.pk,
-                ocr_available=ocr_available,
-                citation_jitter=True,
-                juriscraper_module=site.court_id,
-            )
-
-            logger.info(
-                f"Successfully added opinion {opinion.pk}: "
-                f"{item['case_names'].encode()}"
-            )
-            added += 1
+            except (SingleDuplicateError, BadContentError):
+                pass
 
         # Update the hash if everything finishes properly.
         logger.debug(
-            f"{site.court_id}: Successfully crawled {added}/{len(site)} opinions."
+            "%s: Successfully crawled %s/%s %s.",
+            site.court_id,
+            added,
+            len(site),
+            self.object_type,
         )
         if not full_crawl:
             # Only update the hash if no errors occurred.
             dup_checker.update_site_hash(site.hash)
 
+    def ingest_a_case(
+        self,
+        item,
+        next_case_date: date | None,
+        ocr_available: bool,
+        site,
+        dup_checker: DupChecker,
+        court: Court,
+    ):
+        if item.get("content"):
+            content = item.pop("content")
+        else:
+            content = get_binary_content(item["download_urls"], site)
+
+        # request.content is sometimes a str, sometimes unicode, so
+        # force it all to be bytes, pleasing hashlib.
+        sha1_hash = sha1(force_bytes(content))
+
+        if (
+            court.pk == "nev"
+            and item["precedential_statuses"] == "Unpublished"
+        ) or court.pk in ["neb"]:
+            # Nevada's non-precedential cases have different SHA1 sums
+            # every time.
+
+            # Nebraska updates the pdf causing the SHA1 to not match
+            # the opinions in CL causing duplicates. See CL issue #1452
+
+            lookup_params = {
+                "lookup_value": item["download_urls"],
+                "lookup_by": "download_url",
+            }
+        else:
+            lookup_params = {
+                "lookup_value": sha1_hash,
+                "lookup_by": "sha1",
+            }
+
+        # Duplicates will raise errors
+        dup_checker.press_on(
+            Opinion, item["case_dates"], next_case_date, **lookup_params
+        )
+
+        # Not a duplicate, carry on
+        logger.info(
+            "Adding new document found at: %s", item["download_urls"].encode()
+        )
+        dup_checker.reset()
+
+        child_court = get_child_court(item.get("child_courts", ""), court.id)
+
+        docket, opinion, cluster, citations = make_objects(
+            item, child_court or court, sha1_hash, content
+        )
+
+        save_everything(
+            items={
+                "docket": docket,
+                "opinion": opinion,
+                "cluster": cluster,
+                "citations": citations,
+            },
+            index=False,
+        )
+        extract_doc_content.delay(
+            opinion.pk,
+            ocr_available=ocr_available,
+            citation_jitter=True,
+            juriscraper_module=site.court_id,
+        )
+
+        logger.info(
+            "Successfully added opinion %s: %s",
+            opinion.pk,
+            item["case_names"].encode(),
+        )
+
     def parse_and_scrape_site(self, mod, options: dict):
         site = mod.Site().parse()
         self.scrape_court(site, options["full_crawl"])
@@ -407,6 +439,7 @@ def handle(self, *args, **options):
                 capture_exception(
                     e, fingerprint=[module_string, "{{ default }}"]
                 )
+                logger.debug(traceback.format_exc())
             last_court_in_list = i == (num_courts - 1)
             daemon_mode = options["daemon"]
             if last_court_in_list:
diff --git a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
index a3d36ed7bb..a2f09dbae4 100644
--- a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
@@ -106,75 +106,47 @@ def make_objects(
 
 
 class Command(cl_scrape_opinions.Command):
-    def scrape_court(
+    object_type = "oral arguments"
+
+    def ingest_a_case(
         self,
+        item,
+        next_case_date: date | None,
+        ocr_available: bool,
         site,
-        full_crawl: bool = False,
+        dup_checker: DupChecker,
+        court: Court,
         backscrape: bool = False,
-    ) -> None:
-        # Get the court object early for logging
-        # opinions.united_states.federal.ca9_u --> ca9
-        court_str = site.court_id.split(".")[-1].split("_")[0]
-        court = Court.objects.get(pk=court_str)
-
-        dup_checker = DupChecker(court, full_crawl=full_crawl)
-        abort = dup_checker.abort_by_url_hash(site.url, site.hash)
-        if abort:
-            return
-
-        if site.cookies:
-            logger.info(f"Using cookies: {site.cookies}")
-        for i, item in enumerate(site):
-            content = get_binary_content(item["download_urls"], site)
-            if not content:
-                continue
-
-            current_date = item["case_dates"]
-            try:
-                next_date = site[i + 1]["case_dates"]
-            except IndexError:
-                next_date = None
-
-            # request.content is sometimes a str, sometimes unicode, so
-            # force it all to be bytes, pleasing hashlib.
-            sha1_hash = sha1(force_bytes(content))
-            onwards = dup_checker.press_on(
-                Audio,
-                current_date,
-                next_date,
-                lookup_value=sha1_hash,
-                lookup_by="sha1",
-            )
-            if dup_checker.emulate_break:
-                break
-
-            if onwards:
-                # Not a duplicate, carry on
-                logger.info(
-                    f"Adding new document found at: {item['download_urls'].encode()}"
-                )
-                dup_checker.reset()
-
-                docket, audio_file = make_objects(
-                    item, court, sha1_hash, content
-                )
-
-                save_everything(
-                    items={"docket": docket, "audio_file": audio_file},
-                    index=False,
-                    backscrape=backscrape,
-                )
-                process_audio_file.delay(audio_file.pk)
-
-                logger.info(
-                    "Successfully added audio file {pk}: {name}".format(
-                        pk=audio_file.pk,
-                        name=item["case_names"].encode(),
-                    )
-                )
-
-        # Update the hash if everything finishes properly.
-        logger.info(f"{site.court_id}: Successfully crawled oral arguments.")
-        if not full_crawl:
-            # Only update the hash if no errors occurred.
-            dup_checker.update_site_hash(site.hash)
+    ):
+        content = get_binary_content(item["download_urls"], site)
+        # request.content is sometimes a str, sometimes unicode, so
+        # force it all to be bytes, pleasing hashlib.
+        sha1_hash = sha1(force_bytes(content))
+
+        dup_checker.press_on(
+            Audio,
+            item["case_dates"],
+            next_case_date,
+            lookup_value=sha1_hash,
+            lookup_by="sha1",
+        )
+
+        logger.info(
+            "Adding new document found at: %s", item["download_urls"].encode()
+        )
+        dup_checker.reset()
+
+        docket, audio_file = make_objects(item, court, sha1_hash, content)
+
+        save_everything(
+            items={"docket": docket, "audio_file": audio_file},
+            index=False,
+            backscrape=backscrape,
+        )
+        process_audio_file.delay(audio_file.pk)
+
+        logger.info(
+            "Successfully added audio file %s: %s",
+            audio_file.pk,
+            item["case_names"].encode(),
+        )
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index ac6e5de0f7..5f3136701a 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -21,6 +21,11 @@
 from cl.lib.microservice_utils import microservice
 from cl.lib.test_helpers import generate_docket_target_sources
 from cl.scrapers.DupChecker import DupChecker
+from cl.scrapers.exceptions import (
+    ConsecutiveDuplicatesError,
+    SingleDuplicateError,
+    UnexpectedContentTypeError,
+)
 from cl.scrapers.management.commands import (
     cl_back_scrape_citations,
     cl_scrape_opinions,
@@ -448,26 +453,21 @@ def test_press_on_with_an_empty_database(self) -> None:
         site = test_opinion_scraper.Site()
         site.hash = "this is a dummy hash code string"
         for dup_checker in self.dup_checkers:
-            onwards = dup_checker.press_on(
-                Opinion,
-                now(),
-                now() - timedelta(days=1),
-                lookup_value="content",
-                lookup_by="sha1",
-            )
-            if dup_checker.full_crawl:
-                self.assertTrue(
-                    onwards,
-                    "DupChecker says to abort during a full crawl. This should "
-                    "never happen.",
-                )
-            elif dup_checker.full_crawl is False:
-                count = Opinion.objects.all().count()
-                self.assertTrue(
-                    onwards,
-                    "DupChecker says to abort on dups when the database has %s "
-                    "Documents." % count,
+            try:
+                dup_checker.press_on(
+                    Opinion,
+                    now(),
+                    now() - timedelta(days=1),
+                    lookup_value="content",
+                    lookup_by="sha1",
                 )
+            except (SingleDuplicateError, ConsecutiveDuplicatesError):
+                if dup_checker.full_crawl:
+                    failure = "DupChecker says to abort during a full crawl. This should never happen."
+                else:
+                    count = Opinion.objects.all().count()
+                    failure = f"DupChecker says to abort on dups when the database has {count} Documents."
+                self.fail(failure)
 
 
 class DupcheckerWithFixturesTest(TestCase):
@@ -492,78 +492,54 @@ def setUp(self) -> None:
 
     def test_press_on_with_a_dup_found(self) -> None:
         for dup_checker in self.dup_checkers:
-            onwards = dup_checker.press_on(
-                Opinion,
-                now(),
-                now(),
-                lookup_value=self.content_hash,
-                lookup_by="sha1",
-            )
-            if dup_checker.full_crawl:
-                self.assertFalse(
-                    onwards,
-                    "DupChecker returned True during a full crawl, but there "
-                    "should be duplicates in the database.",
-                )
-                self.assertFalse(
-                    dup_checker.emulate_break,
-                    "DupChecker said to emulate a break during a full crawl. "
-                    "Nothing should stop a full crawl!",
-                )
-
-            elif dup_checker.full_crawl is False:
-                self.assertFalse(
-                    onwards,
-                    "DupChecker returned %s but there should be a duplicate in "
-                    "the database. dup_count is %s, and dup_threshold is %s"
-                    % (
-                        onwards,
-                        dup_checker.dup_count,
-                        dup_checker.dup_threshold,
-                    ),
-                )
-                self.assertTrue(
-                    dup_checker.emulate_break,
-                    "We should have hit a break but didn't.",
+            try:
+                dup_checker.press_on(
+                    Opinion,
+                    now(),
+                    now(),
+                    lookup_value=self.content_hash,
+                    lookup_by="sha1",
                 )
+                if not dup_checker.full_crawl:
+                    self.fail("Did not raise ConsecutiveDuplicatesError.")
+            except ConsecutiveDuplicatesError:
+                if dup_checker.full_crawl:
+                    self.fail(
+                        "DupChecker raised ConsecutiveDuplicatesError breaking"
+                        " the outer loop. Nothing should stop a full crawl!"
+                    )
+            except SingleDuplicateError:
+                # Full crawl or not, a SingleDuplicateError is
+                # expected when a duplicate is found
+                pass
 
     def test_press_on_with_dup_found_and_older_date(self) -> None:
         for dup_checker in self.dup_checkers:
             # Note that the next case occurs prior to the current one
-            onwards = dup_checker.press_on(
-                Opinion,
-                now(),
-                now() - timedelta(days=1),
-                lookup_value=self.content_hash,
-                lookup_by="sha1",
-            )
-            if dup_checker.full_crawl:
-                self.assertFalse(
-                    onwards,
-                    "DupChecker returned True during a full crawl, but there "
-                    "should be duplicates in the database.",
+            try:
+                dup_checker.press_on(
+                    Opinion,
+                    now(),
+                    now() - timedelta(days=1),
+                    lookup_value=self.content_hash,
+                    lookup_by="sha1",
                 )
-                self.assertFalse(
-                    dup_checker.emulate_break,
-                    "DupChecker said to emulate a break during a full crawl. "
-                    "Nothing should stop a full crawl!",
-                )
-            else:
-                self.assertFalse(
-                    onwards,
-                    "DupChecker returned %s but there should be a duplicate in "
-                    "the database. dup_count is %s, and dup_threshold is %s"
-                    % (
-                        onwards,
-                        dup_checker.dup_count,
-                        dup_checker.dup_threshold,
-                    ),
-                )
-                self.assertTrue(
-                    dup_checker.emulate_break,
-                    "We should have hit a break but didn't.",
+                self.fail(
+                    "Expected raising SingleDuplicateError, there was a duplicate in the DB"
                 )
 
+            except SingleDuplicateError:
+                # Full crawl or not, a SingleDuplicateError is
+                # expected when a duplicate is found
+                pass
+            except ConsecutiveDuplicatesError:
+                if dup_checker.full_crawl:
+                    self.fail(
+                        "DupChecker raised ConsecutiveDuplicatesError during a "
+                        "full crawl, breaking the outer loop. Nothing should "
+                        "stop a full crawl!"
+                    )
+
 
 class AudioFileTaskTest(TestCase):
     @classmethod
@@ -641,10 +617,11 @@ def test_unexpected_content_type(self, mock_get):
         """Test when content type doesn't match scraper expectation."""
         mock_get.return_value = self.mock_response
         self.site.expected_content_types = ["text/html"]
-        with mock.patch.object(self.logger, "error") as error_mock:
-            get_binary_content("/dummy/url/", self.site)
-        self.assertIn(
-            "UnexpectedContentTypeError:", error_mock.call_args_list[0][0][0]
+        self.assertRaises(
+            UnexpectedContentTypeError,
+            get_binary_content,
+            "/dummy/url/",
+            self.site,
         )
 
     @mock.patch("requests.Session.get")
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index 445797ce45..311ccaf221 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -21,6 +21,11 @@
 from cl.lib.decorators import retry
 from cl.lib.microservice_utils import microservice
 from cl.recap.mergers import find_docket_object
+from cl.scrapers.exceptions import (
+    EmptyFileError,
+    NoDownloadUrlError,
+    UnexpectedContentTypeError,
+)
 from cl.scrapers.tasks import extract_recap_pdf
 from cl.search.models import Court, Docket, RECAPDocument
 
@@ -155,26 +160,18 @@ def get_extension(content: bytes) -> str:
 def get_binary_content(
     download_url: str,
     site: AbstractSite,
-) -> Optional[bytes | str]:
+) -> bytes | str:
     """Downloads the file, covering a few special cases such as invalid SSL
     certificates and empty file errors.
 
     :param download_url: The URL for the item you wish to download.
     :param site: Site object used to download data
 
-    :return: One of:
-        - None if there was no URL, if the downloaded file was empty or if
-            the content type did not match the expected types
-        - The downloaded and cleaned content
+    :return: The downloaded and cleaned content
+    :raises: NoDownloadUrlError, UnexpectedContentTypeError, EmptyFileError
     """
-    court_str = site.court_id.split(".")[-1].split("_")[0]
-    fingerprint = [f"{court_str}-unexpected-content-type"]
-
     if not download_url:
-        # Occurs when a DeferredList fetcher fails.
-        error = f"NoDownloadUrlError: {download_url}\n{traceback.format_exc()}"
-        logger.error(error, extra={"fingerprint": fingerprint})
-        return
+        raise NoDownloadUrlError(download_url)
 
     # noinspection PyBroadException
     if site.method == "LOCAL":
@@ -207,9 +204,7 @@ def get_binary_content(
 
         # test for empty files (thank you CA1)
         if len(r.content) == 0:
-            error = f"EmptyFileError: {download_url}\n{traceback.format_exc()}"
-            logger.error(error, extra={"fingerprint": fingerprint})
-            return
+            raise EmptyFileError(f"EmptyFileError: '{download_url}'")
 
         # test for expected content type (thanks mont for nil)
         if site.expected_content_types:
@@ -224,12 +219,10 @@ def get_binary_content(
             )
 
             if not m:
-                error = (
-                    f"UnexpectedContentTypeError: {download_url}\n"
-                    f'\'"{content_type}" not in {site.expected_content_types}'
-                )
-                logger.error(error, extra={"fingerprint": fingerprint})
-                return
+                court_str = site.court_id.split(".")[-1].split("_")[0]
+                fingerprint = [f"{court_str}-unexpected-content-type"]
+                msg = f"'{download_url}' '{content_type}' not in {site.expected_content_types}"
+                raise UnexpectedContentTypeError(msg, fingerprint=fingerprint)
 
         # test for and follow meta redirects
         r = follow_redirections(r, s)

From efb78ea2df2674f39b24912331f69b0906632fd1 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Tue, 20 Aug 2024 13:53:41 -0300
Subject: [PATCH 216/372] issue-4140 Move out generate csv data form view. Add
 tests. Some refactors

---
 cl/opinion_page/tests.py | 110 ++++++++++++++++++++++++++++++++++++++-
 cl/opinion_page/utils.py |  30 +++++++++++
 cl/opinion_page/views.py |  46 ++++------------
 3 files changed, 149 insertions(+), 37 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index d09b0d8d93..79e9eab9ca 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -7,6 +7,7 @@
 from unittest import mock
 from unittest.mock import MagicMock, PropertyMock
 
+import asyncio
 from asgiref.sync import async_to_sync, sync_to_async
 from django.conf import settings
 from django.contrib.auth.hashers import make_password
@@ -38,9 +39,12 @@
 )
 from cl.opinion_page.utils import (
     es_get_citing_clusters_with_cache,
-    make_docket_title,
+    make_docket_title, generate_in_memory_csv,
+)
+from cl.opinion_page.views import (
+    get_prev_next_volumes,
+    fetch_docket_entries
 )
-from cl.opinion_page.views import get_prev_next_volumes
 from cl.people_db.factories import (
     PersonFactory,
     PersonWithChildrenFactory,
@@ -52,6 +56,7 @@
     AppellateAttachmentPageFactory,
     DocketEntriesDataFactory,
     DocketEntryDataFactory,
+    DocketDataFactory,
 )
 from cl.recap.mergers import add_docket_entries, merge_attachment_page_data
 from cl.search.factories import (
@@ -62,6 +67,8 @@
     OpinionClusterWithParentsFactory,
     OpinionFactory,
     OpinionsCitedWithParentsFactory,
+    DocketEntryFactory,
+    RECAPDocumentFactory,
 )
 from cl.search.models import (
     PRECEDENTIAL_STATUS,
@@ -1523,3 +1530,102 @@ async def test_block_cluster_and_docket_via_ajax_view(self) -> None:
 
         await self.cluster.arefresh_from_db()
         self.assertTrue(self.cluster.blocked)
+
+
+class DocketEntryFileDownload(TestCase):
+    """Test Docket entries File Download and required functions.
+    """
+
+    def setUp(self):
+        court = CourtFactory(id="ca5", jurisdiction="F")
+        # Main docket to test
+        docket = DocketFactory(
+            court=court,
+            case_name="Foo v. Bar",
+            docket_number="12-40601",
+            pacer_case_id="12345",
+        )
+
+        de1 = DocketEntryFactory(
+            docket=docket,
+            entry_number=506585234,
+        )
+        RECAPDocumentFactory(
+            docket_entry=de1,
+            pacer_doc_id="00506585234",
+            document_number="00506585234",
+            document_type=RECAPDocument.PACER_DOCUMENT,
+        )
+        de1_2 = DocketEntryFactory(
+            docket=docket,
+            entry_number=1,
+        )
+        RECAPDocumentFactory(
+            docket_entry=de1_2,
+            pacer_doc_id="00506585234",
+            document_number="1",
+            document_type=RECAPDocument.PACER_DOCUMENT,
+        )
+
+        de2 = DocketEntryFactory(
+            docket=docket,
+            entry_number=2,
+            description="Lorem ipsum dolor sit amet",
+        )
+        RECAPDocumentFactory(
+            docket_entry=de2,
+            pacer_doc_id="",
+            document_number="2",
+            document_type=RECAPDocument.PACER_DOCUMENT,
+        )
+
+        de3 = DocketEntryFactory(
+            docket=docket,
+            entry_number=506585238,
+        )
+        RECAPDocumentFactory(
+            docket_entry=de3,
+            pacer_doc_id="00506585238",
+            document_number="3",
+            document_type=RECAPDocument.PACER_DOCUMENT,
+        )
+        # Create extra dcoket and docket entries to make sure it only fetch
+        # required docket_entries
+        docket1 = DocketFactory(
+            court=court,
+            case_name="Test v. Test1",
+            docket_number="12-55667",
+            pacer_case_id="12345",
+        )
+        de4 = DocketEntryFactory(
+            docket=docket1,
+            entry_number=506585567,
+        )
+        RECAPDocumentFactory(
+            docket_entry=de4,
+            pacer_doc_id="00506585567",
+            document_number="005506585567",
+            document_type=RECAPDocument.PACER_DOCUMENT,
+        )
+        self.mocked_docket = docket
+        self.mocked_extra_docket = docket1
+        self.mocked_docket_entries = [de1, de1_2, de2, de3]
+        self.mocked_extra_docket_entries = [de4]
+
+
+    def test_fetch_docket_entries(
+        self
+    ) -> None:
+        """Verify that fetch entries function returns right docket_entries"""
+        res = asyncio.run(fetch_docket_entries(self.mocked_docket))
+        assert len(res) == len(self.mocked_docket_entries)
+        assert self.mocked_docket_entries[0] in res
+        assert self.mocked_extra_docket_entries[0] not in res
+
+    def test_download_docket_entries_csv(self) -> None:
+        """Verify str with csv data is created. Check column and data entry"""
+        res = generate_in_memory_csv(self.mocked_docket_entries)
+        res_lines = res.split("\r\n")
+        res_line_data = res_lines[1].split(",")
+        assert res[:16] == '"docketentry_id"'
+        assert res_line_data[1] == '"506585234"'
diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index f9a9e24de8..008dea7836 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -1,4 +1,6 @@
 from typing import Dict, Tuple, Union
+from io import StringIO
+import csv
 
 from asgiref.sync import sync_to_async
 from django.contrib.auth.models import AnonymousUser, User
@@ -131,3 +133,31 @@ async def es_get_citing_clusters_with_cache(
             cache_key, (citing_clusters, citing_cluster_count), a_week
         )
     return citing_clusters, citing_cluster_count
+
+
+def generate_docket_entries_csv_data(docket_entries):
+    """ Get str representing in memory file from docket_entries.
+
+    :param docket_entries: List of DocketEntry that implements CSVExportMixin.
+    :returns str with csv in memory content
+    """
+    output: StringIO = StringIO()
+    csvwriter = csv.writer(output, quotechar='"',
+                           quoting=csv.QUOTE_ALL)
+    columns = []
+
+
+    columns = docket_entries[0].get_csv_columns(get_column_name=True)
+    columns += docket_entries[0].recap_documents.first().get_csv_columns(get_column_name=True)
+    csvwriter.writerow(columns)
+
+    for docket_entry in docket_entries:
+        row = docket_entry.to_csv_row()
+        for recap_doc in docket_entry.recap_documents.all():
+            row += recap_doc.to_csv_row()
+            csvwriter.writerow(row)
+
+
+    csv_content: str = output.getvalue()
+    output.close()
+    return csv_content
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index cb614a674e..6a019fad1d 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -76,7 +76,7 @@
 from cl.opinion_page.utils import (
     core_docket_data,
     es_get_citing_clusters_with_cache,
-    get_case_title,
+    get_case_title, generate_docket_entries_csv_data,
 )
 from cl.people_db.models import AttorneyOrganization, CriminalCount, Role
 from cl.recap.constants import COURT_TIMEZONES
@@ -339,7 +339,7 @@ async def redirect_docket_recap(
     )
 
 
-async def fetch_docket_entries(request, docket):
+async def fetch_docket_entries(docket):
     """ Fetch docket entries asociated to docket
 
     param request: current HttpRequest.
@@ -362,7 +362,7 @@ async def view_docket(
     docket, context = await core_docket_data(request, pk)
     await increment_view_count(docket, request)
 
-    de_list = await fetch_docket_entries(request, docket)
+    de_list = await fetch_docket_entries(docket)
 
     if await sync_to_async(form.is_valid)():
             cd = form.cleaned_data
@@ -584,39 +584,9 @@ async def download_docket_entries_csv(
 ) -> HttpResponse:
     """Download csv file containing list of DocketEntry for specific Docket
     """
-    def generate_csv(de_list, filename):
-        # Create file in memory. Should I create it in /tmp?
-        output: StringIO = StringIO()
-        csvwriter = csv.writer(output, quotechar='"',
-                               quoting=csv.QUOTE_ALL)
-        columns = []
-
-
-        columns = de_list[0].get_csv_columns(get_column_name=True)
-        columns += de_list[0].recap_documents.first().get_csv_columns(get_column_name=True)
-        csvwriter.writerow(columns)
-
-        for docket_entry in de_list:
-            row = docket_entry.to_csv_row()
-            for recap_doc in docket_entry.recap_documents.all():
-                row += recap_doc.to_csv_row()
-                csvwriter.writerow(row)
-
-
-        csv_content: str = output.getvalue()
-        output.close()
-
-        response: HttpResponse = HttpResponse(
-            csv_content,
-            content_type='text/csv'
-        )
-        response['Content-Disposition'] = f'attachment; filename="{filename}"'
-        logger = logging.getLogger("cl.opinion_page")
-        logger.debug(f"HERE: {filename}")
-        return response
 
     docket, _ = await core_docket_data(request, docket_id)
-    de_list = await fetch_docket_entries(request, docket)
+    de_list = await fetch_docket_entries(docket)
     court_id = docket.court_id
     case_name = docket.slug
 
@@ -624,7 +594,13 @@ def generate_csv(de_list, filename):
     date_str = datetime.datetime.now().strftime("%Y-%m-%d")
     filename = f"{case_name}.{court_id}.{docket_id}_{date_str}.csv"
 
-    response = await sync_to_async(generate_csv)(de_list, filename)
+    #TODO check if for large files we'll cache or send file by email
+    csv_content = await sync_to_async(generate_docket_entries_csv_data)(de_list)
+    response: HttpResponse = HttpResponse(
+        csv_content,
+        content_type='text/csv'
+    )
+    response['Content-Disposition'] = f'attachment; filename="{filename}"'
     return response
 
 

From 2d1b6ef83ae7d8608d6611d56c2a5ac74a758924 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 20 Aug 2024 11:16:52 -0600
Subject: [PATCH 217/372] fix(update_opinions_order): typing

---
 cl/corpus_importer/management/commands/update_opinions_order.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 4cf615f500..4d0ce4f2c5 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -287,7 +287,7 @@ def sort_columbia_opinions(options: dict) -> None:
         extracted_columbia_opinions = None
         if xml_path:
             fixed_xml_filepath = os.path.join(
-                options.get("xml_dir"), fix_filepath(xml_path)
+                options.get("xml_dir"), fix_filepath(xml_path)  # type: ignore
             )
 
             if not os.path.exists(fixed_xml_filepath):

From 40f8985062a458734636451d7cb017fa5c03367e Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 20 Aug 2024 13:26:37 -0600
Subject: [PATCH 218/372] refactor(court picker): rename sections

---
 .../includes/jurisdiction_picker_modal.html   | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index 1cf3800812..d07037f885 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -48,14 +48,14 @@ <h2 class="top">Select&nbsp;Jurisdictions</h2>
                 <ul class="nav nav-tabs" id="court-picker-tabs">
                     {% comment %}
                                        Which Types Get Which Tabs?
-                              ┌──────┬───────┬───────┬───────┬─────────┐
-                              │ App. │ Dist. │ Bank. │ State │ Special │
-                              ├──────┼───────┼───────┼───────┼─────────┤
-                     opinions │   X  │   X   │   X   │   X   │    X    │
-                        recap │   X  │   X   │   X   │       │    X    │
-                    oral args │   X  │       │       │       │         │
-                       people │   X  │   X   │   X   │   X   │    X    │
-                              └──────┴───────┴───────┴───────┴─────────┘
+                              ┌──────┬───────┬───────┬───────┬──────┐
+                              │ App. │ Dist. │ Bank. │ State │ More │
+                              ├──────┼───────┼───────┼───────┼──────┤
+                     opinions │   X  │   X   │   X   │   X   │   X  │
+                        recap │   X  │   X   │   X   │       │   X  │
+                    oral args │   X  │       │       │       │      │
+                       people │   X  │   X   │   X   │   X   │   X  │
+                              └──────┴───────┴───────┴───────┴──────┘
 
                     {% endcomment %}
                     <li class="active">
@@ -89,8 +89,8 @@ <h2 class="top">Select&nbsp;Jurisdictions</h2>
                     {% endif %}
                     {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
                         <li>
-                            <a href="#tab-special"
-                               data-toggle="tab">Special</a>
+                            <a href="#tab-more"
+                               data-toggle="tab">More</a>
                         </li>
                     {% endif %}
                 </ul>
@@ -114,9 +114,9 @@ <h2 class="top">Select&nbsp;Jurisdictions</h2>
                             {% for group in district_courts %}
                                 <h3 class="bottom inline">
                                     {% if group.grouper %}
-                                        Terminated Courts
+                                        Historical Courts
                                     {% else %}
-                                        Active Courts
+                                        Miscellaneous Courts
                                     {% endif %}
                                 </h3>
                                 <div class="row">
@@ -193,16 +193,16 @@ <h3 class="bottom inline">U.S. Territory Courts</h3>
                     {% endif %}
 
                     {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
-                        <div class="tab-pane" id="tab-special">
+                        <div class="tab-pane" id="tab-more">
                             {# Regroup into closed/open courts #}
                             {% regroup  courts.special|dictsort:"is_terminated" by is_terminated as special_courts %}
 
                             {% for group in special_courts %}
                                 <h3 class="bottom inline">
                                     {% if group.grouper %}
-                                        Terminated Courts
+                                        Historical Courts
                                     {% else %}
-                                        Active Courts
+                                        Miscellaneous Courts
                                     {% endif %}
                                 </h3>
                                 <div class="row">

From a690e30af573886604a2ddccbf39008e46d6fe9e Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Tue, 20 Aug 2024 21:39:32 -0300
Subject: [PATCH 219/372] issue-4140 Add one more test

---
 cl/opinion_page/tests.py | 53 +++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index 79e9eab9ca..2dc1de2a2b 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -5,7 +5,7 @@
 from datetime import date
 from http import HTTPStatus
 from unittest import mock
-from unittest.mock import MagicMock, PropertyMock
+from unittest.mock import MagicMock, PropertyMock, AsyncMock
 
 import asyncio
 from asgiref.sync import async_to_sync, sync_to_async
@@ -14,7 +14,7 @@
 from django.contrib.auth.models import Group
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.core.management import call_command
-from django.test import override_settings
+from django.test import override_settings, RequestFactory
 from django.test.client import AsyncClient
 from django.urls import reverse
 from django.utils.text import slugify
@@ -39,11 +39,12 @@
 )
 from cl.opinion_page.utils import (
     es_get_citing_clusters_with_cache,
-    make_docket_title, generate_in_memory_csv,
+    make_docket_title,
+    generate_docket_entries_csv_data,
 )
 from cl.opinion_page.views import (
     get_prev_next_volumes,
-    fetch_docket_entries
+    fetch_docket_entries, download_docket_entries_csv
 )
 from cl.people_db.factories import (
     PersonFactory,
@@ -1589,7 +1590,7 @@ def setUp(self):
             document_number="3",
             document_type=RECAPDocument.PACER_DOCUMENT,
         )
-        # Create extra dcoket and docket entries to make sure it only fetch
+        # Create extra docket and docket entries to make sure it only fetch
         # required docket_entries
         docket1 = DocketFactory(
             court=court,
@@ -1612,20 +1613,50 @@ def setUp(self):
         self.mocked_docket_entries = [de1, de1_2, de2, de3]
         self.mocked_extra_docket_entries = [de4]
 
+        request_factory = RequestFactory()
+        self.request = request_factory.get("/mock-url/")
+        self.user = UserFactory.create(
+            username="learned",
+            email="learnedhand@scotus.gov",
+        )
+        self.request.auser = AsyncMock(return_value=self.user)
+
 
     def test_fetch_docket_entries(
         self
     ) -> None:
         """Verify that fetch entries function returns right docket_entries"""
         res = asyncio.run(fetch_docket_entries(self.mocked_docket))
-        assert len(res) == len(self.mocked_docket_entries)
-        assert self.mocked_docket_entries[0] in res
-        assert self.mocked_extra_docket_entries[0] not in res
+        self.assertEqual(len(res), len(self.mocked_docket_entries))
+        self.assertIn(self.mocked_docket_entries[0], res)
+        self.assertNotIn(self.mocked_extra_docket_entries[0],res)
 
     def test_download_docket_entries_csv(self) -> None:
         """Verify str with csv data is created. Check column and data entry"""
-        res = generate_in_memory_csv(self.mocked_docket_entries)
+        res = generate_docket_entries_csv_data(self.mocked_docket_entries)
         res_lines = res.split("\r\n")
         res_line_data = res_lines[1].split(",")
-        assert res[:16] == '"docketentry_id"'
-        assert res_line_data[1] == '"506585234"'
+        self.assertEqual(res[:16],'"docketentry_id"')
+        self.assertEqual(res_line_data[1], '"506585234"')
+
+    def test_download_docket_entries_csv(self) -> None:
+        with mock.patch("cl.opinion_page.utils.generate_docket_entries_csv_data") as mock_download_function:
+            with mock.patch("cl.opinion_page.utils.core_docket_data") as mock_core_docket_data:
+                with mock.patch("cl.opinion_page.utils.user_has_alert") as mock_user_has_alert:
+                    mock_download_function.return_value = '"col1","col2","col3"\r\n"value1","value2","value3"'
+                    mock_download_function.side_effect = '"col1","col2","col3"\r\n"value1","value2","value3"'
+                    mock_user_has_alert.return_value = False
+                    mock_core_docket_data.return_value = (
+                        self.mocked_docket,
+                        {
+                            "docket": self.mocked_docket,
+                            "title": "title",
+                            "note_form": "note_form",
+                            "has_alert": mock_user_has_alert.return_value,
+                            "timezone": "EST",
+                            "private": True
+                        },
+                    )
+
+                    response = async_to_sync(download_docket_entries_csv)(self.request, 1)
+                    self.assertEqual(response["Content-Type"], "text/csv")

From 1882f46e5f32202fe927632999999d052dd3aa3a Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Wed, 21 Aug 2024 12:58:24 -0300
Subject: [PATCH 220/372] fixing quote character not being excaped and
 replacing server side COPY with client side version

---
 scripts/make_bulk_data.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 43c5d32461..749ed11455 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -337,7 +337,7 @@ echo "Streaming ${lst[0]} to S3"
 psql \
 	--command \
 	  "set statement_timeout to 0;
-	   COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE \"\`\", FORCE_QUOTE *)" \
+	   \COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE \"\`\", FORCE_QUOTE *)" \
 	--quiet \
 	--host "$DB_HOST" \
 	--username "$DB_USER" \
@@ -418,7 +418,7 @@ declare -a lst="$group"
 cat >> "$OUT" <<- EOF
 echo "Loading ${lst[2]} to database"
 psql --command \
-"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, QUOTE \'\`\', HEADER)" \
+"\COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, QUOTE \'\`\', HEADER)" \
 --host "\$BULK_DB_HOST" \
 --username "\$BULK_DB_USER" \
 --dbname "\$BULK_DB_NAME"

From 77ca25374545d3d4f6ed5d32e66fd8050957de77 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 21 Aug 2024 11:02:16 -0500
Subject: [PATCH 221/372] fix(elasticsearch): Tweaked initial complaint button
 text according to document jurisdiction type

Fixes: #4324
---
 cl/lib/elasticsearch_utils.py                 |  49 ++++++-
 .../templates/includes/search_result.html     |   4 +-
 cl/search/tests/tests_es_recap.py             | 132 ++++++++++++++++--
 3 files changed, 166 insertions(+), 19 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 14c872ad79..54ea714868 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -13,9 +13,9 @@
 from django.conf import settings
 from django.core.cache import caches
 from django.core.paginator import EmptyPage, Page
-from django.db.models import Case
+from django.db.models import Case, CharField
 from django.db.models import Q as QObject
-from django.db.models import QuerySet, TextField, When
+from django.db.models import QuerySet, TextField, Value, When
 from django.db.models.functions import Substr
 from django.forms.boundfield import BoundField
 from django.http import HttpRequest
@@ -1810,6 +1810,11 @@ def merge_unavailable_fields_on_parent_document(
                     "pk", flat=True
                 )
             )
+            bankruptcy_ids = (
+                Court.federal_courts.bankruptcy_pacer_courts().values_list(
+                    "pk", flat=True
+                )
+            )
             initial_complaints = (
                 RECAPDocument.objects.filter(
                     QObject(
@@ -1845,26 +1850,58 @@ def merge_unavailable_fields_on_parent_document(
                     "docket_entry__docket__court__jurisdiction",
                     "docket_entry__docket__court_id",
                 )
+                .annotate(
+                    court_type=Case(
+                        When(
+                            docket_entry__docket__court_id__in=appellate_court_ids,
+                            then=Value("appellate"),
+                        ),
+                        When(
+                            docket_entry__docket__court_id__in=bankruptcy_ids,
+                            then=Value("bankruptcy"),
+                        ),
+                        default=Value("district"),
+                        output_field=CharField(),
+                    )
+                )
             )
+
             initial_complaints_in_page = {}
             for initial_complaint in initial_complaints:
                 if initial_complaint.has_valid_pdf:
+                    # Initial complaint/petition/appeal available
+                    text_button = {
+                        "appellate": "Notice of Appeal",
+                        "bankruptcy": "Initial Petition",
+                    }.get(initial_complaint.court_type, "Initial Complaint")
                     initial_complaints_in_page[
                         initial_complaint.docket_entry.docket_id
-                    ] = (initial_complaint.get_absolute_url(), None)
+                    ] = (
+                        initial_complaint.get_absolute_url(),
+                        None,
+                        text_button,
+                    )
                 else:
+                    # Initial complaint/petition/appeal not available. Buy button.
+                    buy_text_button = {
+                        "appellate": "Buy Notice of Appeal",
+                        "bankruptcy": "Buy Initial Petition",
+                    }.get(
+                        initial_complaint.court_type, "Buy Initial Complaint"
+                    )
                     initial_complaints_in_page[
                         initial_complaint.docket_entry.docket_id
-                    ] = (None, initial_complaint.pacer_url)
+                    ] = (None, initial_complaint.pacer_url, buy_text_button)
 
             for result in results:
-                complaint_url, buy_complaint_url = (
+                complaint_url, buy_complaint_url, text_button = (
                     initial_complaints_in_page.get(
-                        result.docket_id, (None, None)
+                        result.docket_id, (None, None, None)
                     )
                 )
                 result["initial_complaint_url"] = complaint_url
                 result["buy_initial_complaint_url"] = buy_complaint_url
+                result["initial_complaint_text"] = text_button
 
         case SEARCH_TYPES.OPINION if request_type == "v4" and not highlight:
             # Retrieves the Opinion plain_text from the DB to fill the snippet
diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index 85cc6706e1..ca528c8c7d 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -209,11 +209,11 @@ <h4>
     <div class="col-md-offset-half">
       {% if result.initial_complaint_url %}
         <a href="{{ result.initial_complaint_url }}" class="initial-complaint btn-primary btn">
-          Initial Complaint
+          {{ result.initial_complaint_text }}
         </a>
       {% elif result.buy_initial_complaint_url %}
         <a href="{{ result.buy_initial_complaint_url }}" rel="nofollow" target="_blank"  class="initial-complaint btn-primary btn">
-          Buy Initial Complaint
+          {{ result.initial_complaint_text }}
         </a>
       {% endif %}
       {% if result.child_remaining %}
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 23e98839a3..4f3b0da901 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2254,13 +2254,16 @@ def test_initial_complaint_button(self) -> None:
         """Confirm the initial complaint button is properly shown on different
         scenarios"""
 
+        district_court = CourtFactory(id="cand", jurisdiction="FD")
+
+        dockets_to_remove = []
         # Add dockets with no documents
         with self.captureOnCommitCallbacks(execute=True):
 
             # District document initial complaint available
             de_1 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
-                    court=self.court,
+                    court=district_court,
                     case_name="Lorem District vs Complaint Available",
                     docket_number="1:21-bk-1234",
                     source=Docket.RECAP,
@@ -2269,6 +2272,7 @@ def test_initial_complaint_button(self) -> None:
                 date_filed=datetime.date(2015, 8, 19),
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
+            dockets_to_remove.append(de_1.docket)
             sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
             initial_complaint_1 = RECAPDocumentFactory(
                 docket_entry=de_1,
@@ -2292,7 +2296,7 @@ def test_initial_complaint_button(self) -> None:
             # District document initial complaint not available
             de_2 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
-                    court=self.court,
+                    court=district_court,
                     case_name="Lorem District vs Complaint Not Available",
                     docket_number="1:21-bk-1235",
                     source=Docket.RECAP,
@@ -2301,6 +2305,7 @@ def test_initial_complaint_button(self) -> None:
                 date_filed=datetime.date(2015, 8, 19),
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
+            dockets_to_remove.append(de_2.docket)
             initial_complaint_2 = RECAPDocumentFactory(
                 docket_entry=de_2,
                 document_number="1",
@@ -2330,6 +2335,7 @@ def test_initial_complaint_button(self) -> None:
                 date_filed=datetime.date(2015, 8, 19),
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
+            dockets_to_remove.append(de_3.docket)
             initial_complaint_3 = RECAPDocumentFactory(
                 docket_entry=de_3,
                 document_number="1",
@@ -2349,6 +2355,7 @@ def test_initial_complaint_button(self) -> None:
                 date_filed=datetime.date(2015, 8, 19),
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
+            dockets_to_remove.append(de_4.docket)
             sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
             initial_complaint_4 = RECAPDocumentFactory(
                 docket_entry=de_4,
@@ -2360,13 +2367,79 @@ def test_initial_complaint_button(self) -> None:
                 pacer_doc_id="7654321",
             )
 
+            # Appellate document notice of appeal not available
+            de_5 = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court_2,
+                    case_name="Lorem Appellate vs Notice of appeal not Available",
+                    docket_number="1:21-bk-1239",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            dockets_to_remove.append(de_5.docket)
+            initial_complaint_5 = RECAPDocumentFactory(
+                docket_entry=de_5,
+                document_number="1",
+                attachment_number=1,
+                document_type=RECAPDocument.ATTACHMENT,
+                is_available=False,
+                pacer_doc_id="765425",
+            )
+
             # No DocketEntry for the initial complaint available
             empty_docket = DocketFactory(
-                court=self.court,
+                court=district_court,
                 case_name="Lorem No Initial Complaint Entry",
                 docket_number="1:21-bk-1237",
                 source=Docket.RECAP,
             )
+            dockets_to_remove.append(empty_docket)
+            # Bankruptcy document initial petition available
+            de_6 = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court,
+                    case_name="Lorem Bankruptcy vs Petition Available",
+                    docket_number="1:21-bk-1240",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            dockets_to_remove.append(de_6.docket)
+            sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
+            initial_complaint_6 = RECAPDocumentFactory(
+                docket_entry=de_6,
+                document_number="1",
+                document_type=RECAPDocument.PACER_DOCUMENT,
+                is_available=True,
+                filepath_local=sample_file,
+                pacer_doc_id="12345875",
+            )
+
+            # Bankruptcy document initial petition not available
+            de_7 = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court,
+                    case_name="Lorem Bankruptcy vs Petition Not Available",
+                    docket_number="1:21-bk-1240",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            dockets_to_remove.append(de_7.docket)
+            initial_complaint_7 = RECAPDocumentFactory(
+                docket_entry=de_7,
+                document_number="1",
+                document_type=RECAPDocument.PACER_DOCUMENT,
+                is_available=False,
+                pacer_doc_id="35345875",
+            )
 
         # District document initial complaint available
         cd = {
@@ -2389,10 +2462,10 @@ def test_initial_complaint_button(self) -> None:
             cd, 1, "Complaint Not available"
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
-        self.assertEqual("Buy Initial Complaint", button_text, msg="Error 1")
+        self.assertEqual("Buy Initial Complaint", button_text)
         self.assertEqual(initial_complaint_2.pacer_url, button_url)
 
-        # Appellate document initial complaint available
+        # Appellate notice of appeal available
         cd = {
             "type": SEARCH_TYPES.RECAP,
             "q": '"Lorem Appellate vs Complaint Available"',
@@ -2401,7 +2474,7 @@ def test_initial_complaint_button(self) -> None:
             cd, 1, "Complaint Appellate available"
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
-        self.assertEqual("Initial Complaint", button_text)
+        self.assertEqual("Notice of Appeal", button_text)
         self.assertEqual(initial_complaint_4.get_absolute_url(), button_url)
 
         # No docket entry is available for the initial complaint. No button is shown.
@@ -2429,11 +2502,48 @@ def test_initial_complaint_button(self) -> None:
         self.assertIsNone(button_text)
         self.assertIsNone(button_url)
 
-        de_1.docket.delete()
-        de_2.docket.delete()
-        de_3.docket.delete()
-        de_4.docket.delete()
-        empty_docket.delete()
+        # Appellate notice of appeal not available. Button Buy Notice of appeal
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Lorem Appellate vs Notice of appeal not Available"',
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Complaint Appellate available"
+        )
+        button_url, button_text = self._parse_initial_complaint_button(r)
+        self.assertEqual("Buy Notice of Appeal", button_text)
+        self.assertEqual(initial_complaint_5.pacer_url, button_url)
+
+        "Lorem Bankruptcy vs Petition Available"
+
+        # Bankruptcy document initial petition available
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Lorem Bankruptcy vs Petition Available"',
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Complaint available"
+        )
+        button_url, button_text = self._parse_initial_complaint_button(r)
+        self.assertEqual("Initial Petition", button_text)
+        self.assertEqual(initial_complaint_6.get_absolute_url(), button_url)
+
+        # Bankruptcy document initial petition not available. Show Buy button.
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Lorem Bankruptcy vs Petition Not Available"',
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Complaint Not available"
+        )
+        button_url, button_text = self._parse_initial_complaint_button(r)
+        self.assertEqual(
+            "Buy Initial Petition", button_text, msg="Failed here..."
+        )
+        self.assertEqual(initial_complaint_7.pacer_url, button_url)
+
+        for docket in dockets_to_remove:
+            docket.delete()
 
 
 class RECAPSearchAPICommonTests(RECAPSearchTestCase):

From a711a14a62f35727177c2942d12210ffb497a82e Mon Sep 17 00:00:00 2001
From: Jason Hopper <jason.hopper@qomplx.com>
Date: Wed, 21 Aug 2024 13:03:56 -0300
Subject: [PATCH 222/372] reverting COPY change on the data export command as
 it is run server side

---
 scripts/make_bulk_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 749ed11455..d28b7a73be 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -337,7 +337,7 @@ echo "Streaming ${lst[0]} to S3"
 psql \
 	--command \
 	  "set statement_timeout to 0;
-	   \COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE \"\`\", FORCE_QUOTE *)" \
+	   COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE \"\`\", FORCE_QUOTE *)" \
 	--quiet \
 	--host "$DB_HOST" \
 	--username "$DB_USER" \

From 96595508afa41da78fb927001a1d2debd770e74f Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 21 Aug 2024 11:10:35 -0500
Subject: [PATCH 223/372] fix(elasticsearch): Fixed initial_complaints_in_page
 return values

---
 cl/lib/elasticsearch_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 54ea714868..6d566f6459 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1896,7 +1896,7 @@ def merge_unavailable_fields_on_parent_document(
             for result in results:
                 complaint_url, buy_complaint_url, text_button = (
                     initial_complaints_in_page.get(
-                        result.docket_id, (None, None, None)
+                        result.docket_id, (None, None, "")
                     )
                 )
                 result["initial_complaint_url"] = complaint_url

From ace253e1017a2800a4925e96fa53afd05cdc781e Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Wed, 21 Aug 2024 12:27:59 -0500
Subject: [PATCH 224/372] refactor(cl_scrape_opinions): renamed variable used
 in logging

---
 cl/scrapers/management/commands/cl_back_scrape_citations.py | 2 ++
 .../management/commands/cl_back_scrape_oral_arguments.py    | 2 +-
 cl/scrapers/management/commands/cl_scrape_opinions.py       | 6 +++---
 cl/scrapers/management/commands/cl_scrape_oral_arguments.py | 6 ++++--
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/cl/scrapers/management/commands/cl_back_scrape_citations.py b/cl/scrapers/management/commands/cl_back_scrape_citations.py
index 8975461217..b2da0a4581 100644
--- a/cl/scrapers/management/commands/cl_back_scrape_citations.py
+++ b/cl/scrapers/management/commands/cl_back_scrape_citations.py
@@ -23,6 +23,8 @@
 
 
 class Command(cl_back_scrape_opinions.Command):
+    scrape_target_descr = "citations"
+
     def scrape_court(
         self,
         site,
diff --git a/cl/scrapers/management/commands/cl_back_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_back_scrape_oral_arguments.py
index b1105f01e0..299a091597 100644
--- a/cl/scrapers/management/commands/cl_back_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_back_scrape_oral_arguments.py
@@ -5,7 +5,7 @@
 
 
 class Command(cl_scrape_oral_arguments.Command):
-    def parse_and_scrape_site(self, mod, full_crawl):
+    def parse_and_scrape_site(self, mod, options: dict):
         court_str = mod.__name__.split(".")[-1].split("_")[0]
         logger.info(f'Using court_str: "{court_str}"')
 
diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index f1547d8f7d..9e7854824b 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -219,7 +219,7 @@ def save_everything(
 
 class Command(VerboseCommand):
     help = "Runs the Juriscraper toolkit against one or many jurisdictions."
-    object_type = "opinions"  # for logging purposes
+    scrape_target_descr = "opinions"  # for logging purposes
 
     def __init__(self, stdout=None, stderr=None, no_color=False):
         super().__init__(stdout=None, stderr=None, no_color=False)
@@ -288,7 +288,7 @@ def scrape_court(
         if site.cookies:
             logger.info("Using cookies: %s", site.cookies)
 
-        logger.debug("#%s opinions found.", len(site))
+        logger.debug("#%s %s found.", len(site), self.scrape_target_descr)
 
         added = 0
         for i, item in enumerate(site):
@@ -313,7 +313,7 @@ def scrape_court(
             site.court_id,
             added,
             len(site),
-            self.object_type,
+            self.scrape_target_descr,
         )
         if not full_crawl:
             # Only update the hash if no errors occurred.
diff --git a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
index a2f09dbae4..33da02925d 100644
--- a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
@@ -106,7 +106,7 @@ def make_objects(
 
 
 class Command(cl_scrape_opinions.Command):
-    object_type = "oral arguments"
+    scrape_target_descr = "oral arguments"
 
     def ingest_a_case(
         self,
@@ -132,7 +132,9 @@ def ingest_a_case(
         )
 
         logger.info(
-            "Adding new document found at: %s", item["download_urls"].encode()
+            "Adding new %s found at: %s",
+            self.scrape_target_descr,
+            item["download_urls"].encode(),
         )
         dup_checker.reset()
 

From f0bb2ab6f37170465d0f354dd87a2396b19dc205 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Wed, 21 Aug 2024 15:05:49 -0300
Subject: [PATCH 225/372] issue-4140 Fix test and add comment

---
 cl/opinion_page/tests.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index 2dc1de2a2b..26deeb55fb 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -1631,7 +1631,7 @@ def test_fetch_docket_entries(
         self.assertIn(self.mocked_docket_entries[0], res)
         self.assertNotIn(self.mocked_extra_docket_entries[0],res)
 
-    def test_download_docket_entries_csv(self) -> None:
+    def test_generate_docket_entries_csv_data(self) -> None:
         """Verify str with csv data is created. Check column and data entry"""
         res = generate_docket_entries_csv_data(self.mocked_docket_entries)
         res_lines = res.split("\r\n")
@@ -1639,7 +1639,8 @@ def test_download_docket_entries_csv(self) -> None:
         self.assertEqual(res[:16],'"docketentry_id"')
         self.assertEqual(res_line_data[1], '"506585234"')
 
-    def test_download_docket_entries_csv(self) -> None:
+    def test_view_download_docket_entries_csv(self) -> None:
+        """Test download_docket_entries_csv returns csv content"""
         with mock.patch("cl.opinion_page.utils.generate_docket_entries_csv_data") as mock_download_function:
             with mock.patch("cl.opinion_page.utils.core_docket_data") as mock_core_docket_data:
                 with mock.patch("cl.opinion_page.utils.user_has_alert") as mock_user_has_alert:
@@ -1658,5 +1659,5 @@ def test_download_docket_entries_csv(self) -> None:
                         },
                     )
 
-                    response = async_to_sync(download_docket_entries_csv)(self.request, 1)
+                    response = async_to_sync(download_docket_entries_csv)(self.request, self.mocked_docket.id)
                     self.assertEqual(response["Content-Type"], "text/csv")

From 4c1411dc08384ca3a3535bcaec3c30d5a28b0c5a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 21 Aug 2024 18:12:38 +0000
Subject: [PATCH 226/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/opinion_page/tests.py | 53 ++++++++++++++++++++++----------------
 cl/opinion_page/urls.py  |  7 ++---
 cl/opinion_page/utils.py | 17 +++++++------
 cl/opinion_page/views.py | 55 ++++++++++++++++++++--------------------
 cl/search/models.py      | 12 ++++-----
 5 files changed, 77 insertions(+), 67 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index 26deeb55fb..bb96ca9dab 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -1,20 +1,20 @@
 # mypy: disable-error-code=attr-defined
+import asyncio
 import datetime
 import os
 import shutil
 from datetime import date
 from http import HTTPStatus
 from unittest import mock
-from unittest.mock import MagicMock, PropertyMock, AsyncMock
+from unittest.mock import AsyncMock, MagicMock, PropertyMock
 
-import asyncio
 from asgiref.sync import async_to_sync, sync_to_async
 from django.conf import settings
 from django.contrib.auth.hashers import make_password
 from django.contrib.auth.models import Group
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.core.management import call_command
-from django.test import override_settings, RequestFactory
+from django.test import RequestFactory, override_settings
 from django.test.client import AsyncClient
 from django.urls import reverse
 from django.utils.text import slugify
@@ -39,12 +39,13 @@
 )
 from cl.opinion_page.utils import (
     es_get_citing_clusters_with_cache,
-    make_docket_title,
     generate_docket_entries_csv_data,
+    make_docket_title,
 )
 from cl.opinion_page.views import (
+    download_docket_entries_csv,
+    fetch_docket_entries,
     get_prev_next_volumes,
-    fetch_docket_entries, download_docket_entries_csv
 )
 from cl.people_db.factories import (
     PersonFactory,
@@ -55,20 +56,20 @@
 from cl.recap.factories import (
     AppellateAttachmentFactory,
     AppellateAttachmentPageFactory,
+    DocketDataFactory,
     DocketEntriesDataFactory,
     DocketEntryDataFactory,
-    DocketDataFactory,
 )
 from cl.recap.mergers import add_docket_entries, merge_attachment_page_data
 from cl.search.factories import (
     CitationWithParentsFactory,
     CourtFactory,
+    DocketEntryFactory,
     DocketFactory,
     OpinionClusterFactoryWithChildrenAndParents,
     OpinionClusterWithParentsFactory,
     OpinionFactory,
     OpinionsCitedWithParentsFactory,
-    DocketEntryFactory,
     RECAPDocumentFactory,
 )
 from cl.search.models import (
@@ -1534,8 +1535,7 @@ async def test_block_cluster_and_docket_via_ajax_view(self) -> None:
 
 
 class DocketEntryFileDownload(TestCase):
-    """Test Docket entries File Download and required functions.
-    """
+    """Test Docket entries File Download and required functions."""
 
     def setUp(self):
         court = CourtFactory(id="ca5", jurisdiction="F")
@@ -1621,31 +1621,38 @@ def setUp(self):
         )
         self.request.auser = AsyncMock(return_value=self.user)
 
-
-    def test_fetch_docket_entries(
-        self
-    ) -> None:
+    def test_fetch_docket_entries(self) -> None:
         """Verify that fetch entries function returns right docket_entries"""
         res = asyncio.run(fetch_docket_entries(self.mocked_docket))
         self.assertEqual(len(res), len(self.mocked_docket_entries))
         self.assertIn(self.mocked_docket_entries[0], res)
-        self.assertNotIn(self.mocked_extra_docket_entries[0],res)
+        self.assertNotIn(self.mocked_extra_docket_entries[0], res)
 
     def test_generate_docket_entries_csv_data(self) -> None:
         """Verify str with csv data is created. Check column and data entry"""
         res = generate_docket_entries_csv_data(self.mocked_docket_entries)
         res_lines = res.split("\r\n")
         res_line_data = res_lines[1].split(",")
-        self.assertEqual(res[:16],'"docketentry_id"')
+        self.assertEqual(res[:16], '"docketentry_id"')
         self.assertEqual(res_line_data[1], '"506585234"')
 
     def test_view_download_docket_entries_csv(self) -> None:
         """Test download_docket_entries_csv returns csv content"""
-        with mock.patch("cl.opinion_page.utils.generate_docket_entries_csv_data") as mock_download_function:
-            with mock.patch("cl.opinion_page.utils.core_docket_data") as mock_core_docket_data:
-                with mock.patch("cl.opinion_page.utils.user_has_alert") as mock_user_has_alert:
-                    mock_download_function.return_value = '"col1","col2","col3"\r\n"value1","value2","value3"'
-                    mock_download_function.side_effect = '"col1","col2","col3"\r\n"value1","value2","value3"'
+        with mock.patch(
+            "cl.opinion_page.utils.generate_docket_entries_csv_data"
+        ) as mock_download_function:
+            with mock.patch(
+                "cl.opinion_page.utils.core_docket_data"
+            ) as mock_core_docket_data:
+                with mock.patch(
+                    "cl.opinion_page.utils.user_has_alert"
+                ) as mock_user_has_alert:
+                    mock_download_function.return_value = (
+                        '"col1","col2","col3"\r\n"value1","value2","value3"'
+                    )
+                    mock_download_function.side_effect = (
+                        '"col1","col2","col3"\r\n"value1","value2","value3"'
+                    )
                     mock_user_has_alert.return_value = False
                     mock_core_docket_data.return_value = (
                         self.mocked_docket,
@@ -1655,9 +1662,11 @@ def test_view_download_docket_entries_csv(self) -> None:
                             "note_form": "note_form",
                             "has_alert": mock_user_has_alert.return_value,
                             "timezone": "EST",
-                            "private": True
+                            "private": True,
                         },
                     )
 
-                    response = async_to_sync(download_docket_entries_csv)(self.request, self.mocked_docket.id)
+                    response = async_to_sync(download_docket_entries_csv)(
+                        self.request, self.mocked_docket.id
+                    )
                     self.assertEqual(response["Content-Type"], "text/csv")
diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index 9d3324601b..ff8af9cf64 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -9,6 +9,7 @@
     court_publish_page,
     docket_authorities,
     docket_idb_data,
+    download_docket_entries_csv,
     redirect_docket_recap,
     redirect_og_lookup,
     view_authorities,
@@ -19,7 +20,7 @@
     view_recap_authorities,
     view_recap_document,
     view_summaries,
-    download_docket_entries_csv)
+)
 
 urlpatterns = [
     # Court pages
@@ -56,8 +57,8 @@
     ),
     path(
         "docket/download/<int:docket_id>/",
-        download_docket_entries_csv, # type: ignore[arg-type]
-        name="view_download_docket"
+        download_docket_entries_csv,  # type: ignore[arg-type]
+        name="view_download_docket",
     ),
     path(
         "recap/gov.uscourts.<str:court>.<str:pacer_case_id>/",
diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 008dea7836..92ae01f159 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -1,6 +1,6 @@
-from typing import Dict, Tuple, Union
-from io import StringIO
 import csv
+from io import StringIO
+from typing import Dict, Tuple, Union
 
 from asgiref.sync import sync_to_async
 from django.contrib.auth.models import AnonymousUser, User
@@ -136,19 +136,21 @@ async def es_get_citing_clusters_with_cache(
 
 
 def generate_docket_entries_csv_data(docket_entries):
-    """ Get str representing in memory file from docket_entries.
+    """Get str representing in memory file from docket_entries.
 
     :param docket_entries: List of DocketEntry that implements CSVExportMixin.
     :returns str with csv in memory content
     """
     output: StringIO = StringIO()
-    csvwriter = csv.writer(output, quotechar='"',
-                           quoting=csv.QUOTE_ALL)
+    csvwriter = csv.writer(output, quotechar='"', quoting=csv.QUOTE_ALL)
     columns = []
 
-
     columns = docket_entries[0].get_csv_columns(get_column_name=True)
-    columns += docket_entries[0].recap_documents.first().get_csv_columns(get_column_name=True)
+    columns += (
+        docket_entries[0]
+        .recap_documents.first()
+        .get_csv_columns(get_column_name=True)
+    )
     csvwriter.writerow(columns)
 
     for docket_entry in docket_entries:
@@ -157,7 +159,6 @@ def generate_docket_entries_csv_data(docket_entries):
             row += recap_doc.to_csv_row()
             csvwriter.writerow(row)
 
-
     csv_content: str = output.getvalue()
     output.close()
     return csv_content
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 6a019fad1d..c6189c443d 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -4,7 +4,7 @@
 from collections import OrderedDict, defaultdict
 from http import HTTPStatus
 from io import StringIO
-from typing import Any, Dict, Union, List
+from typing import Any, Dict, List, Union
 from urllib.parse import urlencode
 
 import eyecite
@@ -76,7 +76,8 @@
 from cl.opinion_page.utils import (
     core_docket_data,
     es_get_citing_clusters_with_cache,
-    get_case_title, generate_docket_entries_csv_data,
+    generate_docket_entries_csv_data,
+    get_case_title,
 )
 from cl.people_db.models import AttorneyOrganization, CriminalCount, Role
 from cl.recap.constants import COURT_TIMEZONES
@@ -340,7 +341,7 @@ async def redirect_docket_recap(
 
 
 async def fetch_docket_entries(docket):
-    """ Fetch docket entries asociated to docket
+    """Fetch docket entries asociated to docket
 
     param request: current HttpRequest.
     param docket: docket.id to get related docket_entries.
@@ -348,7 +349,10 @@ async def fetch_docket_entries(docket):
     returns: DocketEntry list.
     """
     de_list = docket.docket_entries.all().prefetch_related(
-      Prefetch("recap_documents", queryset=RECAPDocument.objects.defer("plain_text"))
+        Prefetch(
+            "recap_documents",
+            queryset=RECAPDocument.objects.defer("plain_text"),
+        )
     )
     return de_list
 
@@ -365,20 +369,20 @@ async def view_docket(
     de_list = await fetch_docket_entries(docket)
 
     if await sync_to_async(form.is_valid)():
-            cd = form.cleaned_data
-
-            if cd.get("entry_gte"):
-              de_list = de_list.filter(entry_number__gte=cd["entry_gte"])
-            if cd.get("entry_lte"):
-              de_list = de_list.filter(entry_number__lte=cd["entry_lte"])
-            if cd.get("filed_after"):
-              de_list = de_list.filter(date_filed__gte=cd["filed_after"])
-            if cd.get("filed_before"):
-              de_list = de_list.filter(date_filed__lte=cd["filed_before"])
-            if cd.get("order_by") == DocketEntryFilterForm.DESCENDING:
-              de_list = de_list.order_by(
-                  "-recap_sequence_number", "-entry_number"
-              )
+        cd = form.cleaned_data
+
+        if cd.get("entry_gte"):
+            de_list = de_list.filter(entry_number__gte=cd["entry_gte"])
+        if cd.get("entry_lte"):
+            de_list = de_list.filter(entry_number__lte=cd["entry_lte"])
+        if cd.get("filed_after"):
+            de_list = de_list.filter(date_filed__gte=cd["filed_after"])
+        if cd.get("filed_before"):
+            de_list = de_list.filter(date_filed__lte=cd["filed_before"])
+        if cd.get("order_by") == DocketEntryFilterForm.DESCENDING:
+            de_list = de_list.order_by(
+                "-recap_sequence_number", "-entry_number"
+            )
 
     page = request.GET.get("page", 1)
 
@@ -582,25 +586,22 @@ async def make_thumb_if_needed(
 async def download_docket_entries_csv(
     request: HttpRequest, docket_id: int
 ) -> HttpResponse:
-    """Download csv file containing list of DocketEntry for specific Docket
-    """
+    """Download csv file containing list of DocketEntry for specific Docket"""
 
     docket, _ = await core_docket_data(request, docket_id)
     de_list = await fetch_docket_entries(docket)
     court_id = docket.court_id
     case_name = docket.slug
 
-
     date_str = datetime.datetime.now().strftime("%Y-%m-%d")
     filename = f"{case_name}.{court_id}.{docket_id}_{date_str}.csv"
 
-    #TODO check if for large files we'll cache or send file by email
-    csv_content = await sync_to_async(generate_docket_entries_csv_data)(de_list)
-    response: HttpResponse = HttpResponse(
-        csv_content,
-        content_type='text/csv'
+    # TODO check if for large files we'll cache or send file by email
+    csv_content = await sync_to_async(generate_docket_entries_csv_data)(
+        de_list
     )
-    response['Content-Disposition'] = f'attachment; filename="{filename}"'
+    response: HttpResponse = HttpResponse(csv_content, content_type="text/csv")
+    response["Content-Disposition"] = f'attachment; filename="{filename}"'
     return response
 
 
diff --git a/cl/search/models.py b/cl/search/models.py
index 503b2d5ede..db8ae0865c 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -1301,7 +1301,7 @@ def get_csv_columns(self, get_column_name=False):
             "time_filed",
             "pacer_sequence_number",
             "recap_sequence_number",
-            "description"
+            "description",
         ]
         if get_column_name:
             columns = [self.add_class_name(col) for col in columns]
@@ -1315,7 +1315,6 @@ def get_column_fuction(self):
         return {}
 
 
-
 @pghistory.track(AfterUpdateOrDeleteSnapshot(), obj_field=None)
 class DocketEntryTags(DocketEntry.tags.through):
     """A model class to track docket entry tags m2m relation"""
@@ -1376,10 +1375,9 @@ class Meta:
 
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
-class RECAPDocument(AbstractPacerDocument,
-                    AbstractPDF,
-                    AbstractDateTimeModel,
-                    CSVExportMixin):
+class RECAPDocument(
+    AbstractPacerDocument, AbstractPDF, AbstractDateTimeModel, CSVExportMixin
+):
     """The model for Docket Documents and Attachments."""
 
     PACER_DOCUMENT = 1
@@ -1815,7 +1813,7 @@ def get_csv_columns(self, get_column_name=False):
             "file_size",
             "filepath_local",
             "filepath_ia",
-            "ocr_status"
+            "ocr_status",
         ]
         if get_column_name:
             columns = [self.add_class_name(col) for col in columns]

From 394bef63567944ab55cef6dafc57e73563797b0a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 21 Aug 2024 12:14:31 -0600
Subject: [PATCH 227/372] feat(court picker): add military and native american
 section to court picker

---
 cl/lib/search_utils.py                        |  8 ++-
 .../includes/jurisdiction_picker_modal.html   | 55 +++++++++++++++++++
 2 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py
index affb89318e..d69734733a 100644
--- a/cl/lib/search_utils.py
+++ b/cl/lib/search_utils.py
@@ -230,6 +230,8 @@ def merge_form_with_courts(
         "district": [],
         "state": [],
         "special": [],
+        "military": [],
+        "native": [],
     }
     bap_bundle = []
     b_bundle = []
@@ -254,10 +256,12 @@ def merge_form_with_courts(
             Court.FEDERAL_SPECIAL,
             Court.COMMITTEE,
             Court.INTERNATIONAL,
-            Court.MILITARY_APPELLATE,
-            Court.MILITARY_TRIAL,
         ]:
             court_tabs["special"].append(court)
+        elif court.jurisdiction in Court.MILITARY_JURISDICTIONS:
+            court_tabs["military"].append(court)
+        elif court.jurisdiction in Court.TRIBAL_JURISDICTIONS:
+            court_tabs["native"].append(court)
 
     # Put the bankruptcy bundles in the courts dict
     if bap_bundle:
diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index d07037f885..916e18c280 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -228,6 +228,61 @@ <h3 class="bottom inline">
                                     {% endfor %}
                                 </div>
                             {% endfor %}
+                        {% if courts.military %}
+                            <h3 class="bottom inline">
+                                Military Courts
+                            </h3>
+                            <div class="row">
+                                {% for row in courts.military|rows:3 %}
+                                        <div class="col-sm-4">
+                                            {% for court in row %}
+                                                <div class="checkbox">
+                                                    <label for="id_{{ court.pk }}">
+                                                    <input
+                                                            id="id_{{ court.pk }}"
+                                                            name="{{ court.pk }}"
+                                                            {% if court.checked %}checked="checked"{% endif %}
+                                                            type="checkbox"
+                                                            class="external-input court-checkbox">
+                                                        {{ court.short_name }}
+                                                        {% if court.end_date %}
+                                                            <span class="gray">({{ court.end_date.year }})</span>
+                                                        {% endif %}
+                                                    </label>
+                                                </div>
+                                            {% endfor %}
+                                        </div>
+                                    {% endfor %}
+                            </div>
+                        {% endif %}
+
+                        {% if courts.native %}
+                            <h3 class="bottom inline">
+                                Native American Courts
+                            </h3>
+                            <div class="row">
+                                {% for row in courts.native|rows:3 %}
+                                        <div class="col-sm-4">
+                                            {% for court in row %}
+                                                <div class="checkbox">
+                                                    <label for="id_{{ court.pk }}">
+                                                    <input
+                                                            id="id_{{ court.pk }}"
+                                                            name="{{ court.pk }}"
+                                                            {% if court.checked %}checked="checked"{% endif %}
+                                                            type="checkbox"
+                                                            class="external-input court-checkbox">
+                                                        {{ court.short_name }}
+                                                        {% if court.end_date %}
+                                                            <span class="gray">({{ court.end_date.year }})</span>
+                                                        {% endif %}
+                                                    </label>
+                                                </div>
+                                            {% endfor %}
+                                        </div>
+                                    {% endfor %}
+                            </div>
+                        {% endif %}
                         </div>
                     {% endif %}
                 </div>

From 53e7bcd3b7133b2b89a4eaa8af6011f360be7aa4 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Wed, 21 Aug 2024 15:28:57 -0300
Subject: [PATCH 228/372] issue-4140 Fix isort diffs

---
 cl/opinion_page/tests.py | 14 +++++++-------
 cl/opinion_page/urls.py  |  3 ++-
 cl/opinion_page/utils.py |  4 ++--
 cl/opinion_page/views.py |  5 +++--
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index 26deeb55fb..b60e8d4f07 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -1,20 +1,20 @@
 # mypy: disable-error-code=attr-defined
+import asyncio
 import datetime
 import os
 import shutil
 from datetime import date
 from http import HTTPStatus
 from unittest import mock
-from unittest.mock import MagicMock, PropertyMock, AsyncMock
+from unittest.mock import AsyncMock, MagicMock, PropertyMock
 
-import asyncio
 from asgiref.sync import async_to_sync, sync_to_async
 from django.conf import settings
 from django.contrib.auth.hashers import make_password
 from django.contrib.auth.models import Group
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.core.management import call_command
-from django.test import override_settings, RequestFactory
+from django.test import RequestFactory, override_settings
 from django.test.client import AsyncClient
 from django.urls import reverse
 from django.utils.text import slugify
@@ -39,12 +39,13 @@
 )
 from cl.opinion_page.utils import (
     es_get_citing_clusters_with_cache,
-    make_docket_title,
     generate_docket_entries_csv_data,
+    make_docket_title,
 )
 from cl.opinion_page.views import (
+    download_docket_entries_csv,
+    fetch_docket_entries,
     get_prev_next_volumes,
-    fetch_docket_entries, download_docket_entries_csv
 )
 from cl.people_db.factories import (
     PersonFactory,
@@ -57,18 +58,17 @@
     AppellateAttachmentPageFactory,
     DocketEntriesDataFactory,
     DocketEntryDataFactory,
-    DocketDataFactory,
 )
 from cl.recap.mergers import add_docket_entries, merge_attachment_page_data
 from cl.search.factories import (
     CitationWithParentsFactory,
     CourtFactory,
+    DocketEntryFactory,
     DocketFactory,
     OpinionClusterFactoryWithChildrenAndParents,
     OpinionClusterWithParentsFactory,
     OpinionFactory,
     OpinionsCitedWithParentsFactory,
-    DocketEntryFactory,
     RECAPDocumentFactory,
 )
 from cl.search.models import (
diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index 9d3324601b..2a9098f717 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -9,6 +9,7 @@
     court_publish_page,
     docket_authorities,
     docket_idb_data,
+    download_docket_entries_csv,
     redirect_docket_recap,
     redirect_og_lookup,
     view_authorities,
@@ -19,7 +20,7 @@
     view_recap_authorities,
     view_recap_document,
     view_summaries,
-    download_docket_entries_csv)
+)
 
 urlpatterns = [
     # Court pages
diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 008dea7836..6059e4cbb7 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -1,6 +1,6 @@
-from typing import Dict, Tuple, Union
-from io import StringIO
 import csv
+from io import StringIO
+from typing import Dict, Tuple, Union
 
 from asgiref.sync import sync_to_async
 from django.contrib.auth.models import AnonymousUser, User
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 6a019fad1d..110d17451b 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -4,7 +4,7 @@
 from collections import OrderedDict, defaultdict
 from http import HTTPStatus
 from io import StringIO
-from typing import Any, Dict, Union, List
+from typing import Any, Dict, List, Union
 from urllib.parse import urlencode
 
 import eyecite
@@ -76,7 +76,8 @@
 from cl.opinion_page.utils import (
     core_docket_data,
     es_get_citing_clusters_with_cache,
-    get_case_title, generate_docket_entries_csv_data,
+    generate_docket_entries_csv_data,
+    get_case_title,
 )
 from cl.people_db.models import AttorneyOrganization, CriminalCount, Role
 from cl.recap.constants import COURT_TIMEZONES

From cafa9b6e9c80af3f2f7a035416d66e6cfbc7d7cd Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 21 Aug 2024 12:45:20 -0600
Subject: [PATCH 229/372] feat(opinions_order): improve columbia query

---
 .../commands/update_opinions_order.py         | 38 ++++++++++++++-----
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 4d0ce4f2c5..9516476358 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -6,7 +6,7 @@
 
 from bs4 import BeautifulSoup
 from django.db import transaction
-from django.db.models import Count
+from django.db.models import Count, Exists, OuterRef
 
 from cl.corpus_importer.import_columbia.columbia_utils import (
     extract_columbia_opinions,
@@ -77,7 +77,9 @@ def sort_harvard_opinions(options: dict) -> None:
                     f"No sub_opinions updated for cluster id: {cluster}"
                 )
                 continue
-            logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}")
+            logger.info(
+                msg=f"Harvard opinions reordered for cluster id: {cluster.id}"
+            )
             # Wait between each processed cluster to avoid issues with elastic
             time.sleep(options["delay"])
 
@@ -233,10 +235,6 @@ def update_opinions(
                 f"cluster id: {cluster_id}"
             )
             transaction.set_rollback(True)
-        else:
-            logger.info(
-                f"The order of opinions was updated, cluster id: {cluster_id}"
-            )
 
 
 def sort_columbia_opinions(options: dict) -> None:
@@ -249,10 +247,24 @@ def sort_columbia_opinions(options: dict) -> None:
     skip_until = options.get("skip_until", None)
     limit = options.get("limit", None)
 
-    # Get all columbia cluster ids with more than one opinion
+    # Get all columbia cluster ids with more than one opinion and where opinions has
+    # a local_path, we require the xml path and file to infer the ordering
     clusters = (
-        OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source__in=VALID_COLUMBIA_SOURCES)
+        OpinionCluster.objects.annotate(
+            opinions_count=Count("sub_opinions"),
+            has_non_empty_file=Exists(
+                Opinion.objects.filter(
+                    cluster=OuterRef("pk"),
+                    local_path__isnull=False,
+                    local_path__gt="",
+                )
+            ),
+        )
+        .filter(
+            opinions_count__gt=1,
+            source__in=VALID_COLUMBIA_SOURCES,
+            has_non_empty_file=True,
+        )
         .order_by("id")
         .values_list("id", flat=True)
     )
@@ -358,6 +370,12 @@ def sort_columbia_opinions(options: dict) -> None:
                     matches,
                 )
 
+                logger.info(
+                    msg=f"Columbia opinions reordered for cluster id: {cluster_id}"
+                )
+                # Wait between each processed cluster to avoid issues with elastic
+                time.sleep(options["delay"])
+
 
 class Command(VerboseCommand):
     help = "Add ordering Key for sub opinions"
@@ -397,7 +415,7 @@ def add_arguments(self, parser):
         parser.add_argument(
             "--delay",
             type=float,
-            default=0.2,
+            default=0.1,
             help="How long to wait to update each opinion (in seconds, allows "
             "floating numbers).",
         )

From cf2b18c732ad3fed9e27d1f30ecefb848fe164fc Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Thu, 22 Aug 2024 09:47:46 -0300
Subject: [PATCH 230/372] issue-4140 Address some PR comments

---
 cl/lib/model_helpers.py  | 44 +++++++++++++++++++++++++++++++++++-
 cl/opinion_page/tests.py | 37 +++++++++++++++++--------------
 cl/opinion_page/views.py |  7 ++----
 cl/search/models.py      | 48 +++-------------------------------------
 4 files changed, 69 insertions(+), 67 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 1cf8ca8460..5a2be6f7cf 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -1,7 +1,7 @@
 import contextlib
 import os
 import re
-from typing import Optional
+from typing import List, Optional
 
 from django.core.exceptions import ValidationError
 from django.utils.text import get_valid_filename, slugify
@@ -540,3 +540,45 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
     """
     # If no match is found, return empty str
     return ""
+
+
+class CSVExportMixin:
+
+    def get_csv_columns(self, get_column_name=False) -> List[str]:
+        """Get list of column names required in a csv file.
+        If get column name is True. It will add class name to id
+
+        :param: get_column_name: bool. Whether add class name to primary attr name
+
+        :return: list of attrs of class to get into csv file"""
+        raise NotImplementedError(
+            "Subclass must implement get_csv_columns method"
+        )
+
+    def get_column_function(self) -> List[str]:
+        """Get dict of attrs: fucntion to apply on field value if it needs
+        to be pre-processed before being add to csv
+
+        returns: dict -- > {attr1: function}"""
+        raise NotImplementedError(
+            "Subclass must implement get_column_fuction method"
+        )
+
+    def to_csv_row(self) -> List[str]:
+        """Get fields in model based on attrs column names.
+        Apply function to attr value if required.
+        Return list of modified values for csv row"""
+        row = []
+        functions = self.get_column_function()
+        for field in self.get_csv_columns(get_column_name=False):
+            attr = getattr(self, field)
+            if not attr:
+                attr: ""
+            function = functions.get(field)
+            if function:
+                attr = function(field)
+            row.append(attr)
+        return row
+
+    def add_class_name(self, attribute_name):
+        return f"{self.__class__.__name__.lower()}_{attribute_name}"
diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index bb96ca9dab..7a72ba3871 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -11,7 +11,7 @@
 from asgiref.sync import async_to_sync, sync_to_async
 from django.conf import settings
 from django.contrib.auth.hashers import make_password
-from django.contrib.auth.models import Group
+from django.contrib.auth.models import Group, User
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.core.management import call_command
 from django.test import RequestFactory, override_settings
@@ -77,6 +77,7 @@
     SEARCH_TYPES,
     Citation,
     Docket,
+    DocketEntry,
     Opinion,
     OpinionCluster,
     RECAPDocument,
@@ -1543,18 +1544,18 @@ def setUp(self):
         docket = DocketFactory(
             court=court,
             case_name="Foo v. Bar",
-            docket_number="12-40601",
+            docket_number="12-11111",
             pacer_case_id="12345",
         )
 
         de1 = DocketEntryFactory(
             docket=docket,
-            entry_number=506585234,
+            entry_number=506581111,
         )
         RECAPDocumentFactory(
             docket_entry=de1,
-            pacer_doc_id="00506585234",
-            document_number="00506585234",
+            pacer_doc_id="00506581111",
+            document_number="00506581111",
             document_type=RECAPDocument.PACER_DOCUMENT,
         )
         de1_2 = DocketEntryFactory(
@@ -1563,7 +1564,7 @@ def setUp(self):
         )
         RECAPDocumentFactory(
             docket_entry=de1_2,
-            pacer_doc_id="00506585234",
+            pacer_doc_id="00506581111",
             document_number="1",
             document_type=RECAPDocument.PACER_DOCUMENT,
         )
@@ -1582,11 +1583,11 @@ def setUp(self):
 
         de3 = DocketEntryFactory(
             docket=docket,
-            entry_number=506585238,
+            entry_number=506582222,
         )
         RECAPDocumentFactory(
             docket_entry=de3,
-            pacer_doc_id="00506585238",
+            pacer_doc_id="00506582222",
             document_number="3",
             document_type=RECAPDocument.PACER_DOCUMENT,
         )
@@ -1595,17 +1596,17 @@ def setUp(self):
         docket1 = DocketFactory(
             court=court,
             case_name="Test v. Test1",
-            docket_number="12-55667",
+            docket_number="12-222222",
             pacer_case_id="12345",
         )
         de4 = DocketEntryFactory(
             docket=docket1,
-            entry_number=506585567,
+            entry_number=506582222,
         )
         RECAPDocumentFactory(
             docket_entry=de4,
-            pacer_doc_id="00506585567",
-            document_number="005506585567",
+            pacer_doc_id="00506582222",
+            document_number="005506582222",
             document_type=RECAPDocument.PACER_DOCUMENT,
         )
         self.mocked_docket = docket
@@ -1621,6 +1622,13 @@ def setUp(self):
         )
         self.request.auser = AsyncMock(return_value=self.user)
 
+    def tearDown(self):
+        # Clear all test data
+        Docket.objects.all().delete()
+        DocketEntry.objects.all().delete()
+        RECAPDocument.objects.all().delete()
+        User.objects.all().delete()
+
     def test_fetch_docket_entries(self) -> None:
         """Verify that fetch entries function returns right docket_entries"""
         res = asyncio.run(fetch_docket_entries(self.mocked_docket))
@@ -1634,7 +1642,7 @@ def test_generate_docket_entries_csv_data(self) -> None:
         res_lines = res.split("\r\n")
         res_line_data = res_lines[1].split(",")
         self.assertEqual(res[:16], '"docketentry_id"')
-        self.assertEqual(res_line_data[1], '"506585234"')
+        self.assertEqual(res_line_data[1], '"506581111"')
 
     def test_view_download_docket_entries_csv(self) -> None:
         """Test download_docket_entries_csv returns csv content"""
@@ -1650,9 +1658,6 @@ def test_view_download_docket_entries_csv(self) -> None:
                     mock_download_function.return_value = (
                         '"col1","col2","col3"\r\n"value1","value2","value3"'
                     )
-                    mock_download_function.side_effect = (
-                        '"col1","col2","col3"\r\n"value1","value2","value3"'
-                    )
                     mock_user_has_alert.return_value = False
                     mock_core_docket_data.return_value = (
                         self.mocked_docket,
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index c6189c443d..08b1e9b816 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -3,8 +3,7 @@
 import logging
 from collections import OrderedDict, defaultdict
 from http import HTTPStatus
-from io import StringIO
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, Union
 from urllib.parse import urlencode
 
 import eyecite
@@ -343,9 +342,7 @@ async def redirect_docket_recap(
 async def fetch_docket_entries(docket):
     """Fetch docket entries asociated to docket
 
-    param request: current HttpRequest.
     param docket: docket.id to get related docket_entries.
-
     returns: DocketEntry list.
     """
     de_list = docket.docket_entries.all().prefetch_related(
@@ -594,7 +591,7 @@ async def download_docket_entries_csv(
     case_name = docket.slug
 
     date_str = datetime.datetime.now().strftime("%Y-%m-%d")
-    filename = f"{case_name}.{court_id}.{docket_id}_{date_str}.csv"
+    filename = f"{case_name}.{court_id}.{docket_id}.{date_str}.csv"
 
     # TODO check if for large files we'll cache or send file by email
     csv_content = await sync_to_async(generate_docket_entries_csv_data)(
diff --git a/cl/search/models.py b/cl/search/models.py
index 812c560303..6ddcc7950f 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -28,6 +28,7 @@
 from cl.lib import fields
 from cl.lib.date_time import midnight_pt
 from cl.lib.model_helpers import (
+    CSVExportMixin,
     linkify_orig_docket_number,
     make_docket_number_core,
     make_recap_path,
@@ -1135,49 +1136,6 @@ class Meta:
         proxy = True
 
 
-# @pghistory.track(AfterUpdateOrDeleteSnapshot(), obj_field=None)
-class CSVExportMixin:
-
-    def get_csv_columns(self, get_column_name=False) -> List[str]:
-        """Get list of column names required in a csv file.
-        If get column name is True. It will add class name to id
-
-        :param: get_column_name: bool. Whether add class name to primary attr name
-
-        :return: list of attrs of class to get into csv file"""
-        raise NotImplementedError(
-            "Subclass must implement get_csv_columns method"
-        )
-
-    def get_column_fuction(self) -> List[str]:
-        """Get dict of attrs: fucntion to apply on field value if it needs
-        to be pre-processed before being add to csv
-
-        returns: dict -- > {attr1: function}"""
-        raise NotImplementedError(
-            "Subclass must implement get_column_fuction method"
-        )
-
-    def to_csv_row(self) -> List[str]:
-        """Get fields in model based on attrs column names.
-        Apply function to attr value if required.
-        Return list of modified values for csv row"""
-        row = []
-        functions = self.get_column_fuction()
-        for field in self.get_csv_columns(get_column_name=False):
-            attr = getattr(self, field)
-            if not attr:
-                attr: ""
-            function = functions.get(field)
-            if function:
-                attr = function(field)
-            row.append(attr)
-        return row
-
-    def add_class_name(self, attribute_name):
-        return f"{self.__class__.__name__.lower()}_{attribute_name}"
-
-
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
 class DocketEntry(AbstractDateTimeModel, CSVExportMixin):
     docket = models.ForeignKey(
@@ -1314,7 +1272,7 @@ def get_csv_columns(self, get_column_name=False):
             columns = [self.add_class_name(col) for col in columns]
         return columns
 
-    def get_column_fuction(self):
+    def get_column_function(self):
         """Get dict of attrs: fucntion to apply on field value if it needs
         to be pre-processed before being add to csv
 
@@ -1832,7 +1790,7 @@ def _get_readable_document_type(self, *args, **kwargs):
     def _get_readable_ocr_status(self, *args, **kwargs):
         return self.get_ocr_status_display()
 
-    def get_column_fuction(self):
+    def get_column_function(self):
         """Get dict of attrs: function to apply on field value if it needs
         to be pre-processed before being add to csv
         If not functions returns empty dict

From 573b959cb82f98bdb98889d60616c02468e93da0 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Thu, 22 Aug 2024 10:23:17 -0300
Subject: [PATCH 231/372] issue-4140 Fix lint issue

---
 cl/lib/model_helpers.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 5a2be6f7cf..3e4248f336 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -1,7 +1,7 @@
 import contextlib
 import os
 import re
-from typing import List, Optional
+from typing import Callable, Dict, List, Optional
 
 from django.core.exceptions import ValidationError
 from django.utils.text import get_valid_filename, slugify
@@ -555,8 +555,8 @@ def get_csv_columns(self, get_column_name=False) -> List[str]:
             "Subclass must implement get_csv_columns method"
         )
 
-    def get_column_function(self) -> List[str]:
-        """Get dict of attrs: fucntion to apply on field value if it needs
+    def get_column_function(self) -> Dict[str, Callable[[str], str]]:
+        """Get dict of attrs: function to apply on field value if it needs
         to be pre-processed before being add to csv
 
         returns: dict -- > {attr1: function}"""
@@ -570,10 +570,11 @@ def to_csv_row(self) -> List[str]:
         Return list of modified values for csv row"""
         row = []
         functions = self.get_column_function()
-        for field in self.get_csv_columns(get_column_name=False):
+        columns = self.get_csv_columns(get_column_name=False)
+        for field in columns:
             attr = getattr(self, field)
             if not attr:
-                attr: ""
+                attr = ""
             function = functions.get(field)
             if function:
                 attr = function(field)

From 0870580037c2add25f2e0b49d4778f5478d8e574 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Thu, 22 Aug 2024 10:50:05 -0300
Subject: [PATCH 232/372] issue-4140 update comments and add type hinting

---
 cl/lib/model_helpers.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 3e4248f336..082e436d25 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -544,11 +544,11 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
 
 class CSVExportMixin:
 
-    def get_csv_columns(self, get_column_name=False) -> List[str]:
+    def get_csv_columns(self, get_column_name: bool=False) -> List[str]:
         """Get list of column names required in a csv file.
-        If get column name is True. It will add class name to id
+        If get column name is True. It will add class name to attribute
 
-        :param: get_column_name: bool. Whether add class name to primary attr name
+        :param: get_column_name: bool. Whether add class name to attr name
 
         :return: list of attrs of class to get into csv file"""
         raise NotImplementedError(
@@ -581,5 +581,5 @@ def to_csv_row(self) -> List[str]:
             row.append(attr)
         return row
 
-    def add_class_name(self, attribute_name):
+    def add_class_name(self, attribute_name: str) -> str:
         return f"{self.__class__.__name__.lower()}_{attribute_name}"

From b01039a7d0876be807282103c0f425c62c8ff518 Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Thu, 22 Aug 2024 16:42:02 +0000
Subject: [PATCH 233/372] Update freelawproject dependencies

---
 poetry.lock | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 3419c4fb05..72f2eb6144 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -2236,13 +2236,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.17"
+version = "2.6.18"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.17-py27-none-any.whl", hash = "sha256:c73fb29ebbb6faa489507a000880c3d8c5681cce4a4ef0e9afa9674075a9b826"},
-    {file = "juriscraper-2.6.17.tar.gz", hash = "sha256:16b2b2fc0cacdf09ffb1712d42fe3ca055722414e1fec2082f65cd467fe09f82"},
+    {file = "juriscraper-2.6.18-py27-none-any.whl", hash = "sha256:f6bd34df64fe8c739e342a95e56de1b1c2b21ff2edce70ea8607d216dcaf299d"},
+    {file = "juriscraper-2.6.18.tar.gz", hash = "sha256:2162c880c039f46069bcdad714b90b8a437b7274662e1e9e849e26266e8dd065"},
 ]
 
 [package.dependencies]

From 24502dca910b83d0340b778480c97bc72c8ad037 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 22 Aug 2024 13:03:37 -0600
Subject: [PATCH 234/372] fix(court picker): indent Territory supreme courts in
 court picket

---
 cl/search/templates/includes/jurisdiction_picker_modal.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index 916e18c280..5680288056 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -179,7 +179,7 @@ <h3 class="bottom inline">U.S. Territory Courts</h3>
                                 {% for col_bundle in group|group_courts:3 %}
                                     <div class="col-sm-4">
                                     {% for court in col_bundle %}
-                                        {% if court.jurisdiction == 'S' %}
+                                        {% if court.jurisdiction == 'S' or court.jurisdiction == 'TS' %}
                                             {% include "includes/court_checkbox.html" %}
                                         {% else %}
                                             {% include "includes/court_checkbox.html" with indent=True %}

From 39a86e2ad969626bcee94e432856e0c69c79f290 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 22 Aug 2024 13:09:18 -0600
Subject: [PATCH 235/372] fix(court picker): reorder sections in more tab

---
 .../includes/jurisdiction_picker_modal.html   | 112 +++++++++---------
 1 file changed, 57 insertions(+), 55 deletions(-)

diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index 5680288056..d7da1ceba3 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -194,6 +194,63 @@ <h3 class="bottom inline">U.S. Territory Courts</h3>
 
                     {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
                         <div class="tab-pane" id="tab-more">
+
+                             {% if courts.military %}
+                                <h3 class="bottom inline">
+                                    Military Courts
+                                </h3>
+                                <div class="row">
+                                    {% for row in courts.military|rows:3 %}
+                                            <div class="col-sm-4">
+                                                {% for court in row %}
+                                                    <div class="checkbox">
+                                                        <label for="id_{{ court.pk }}">
+                                                        <input
+                                                                id="id_{{ court.pk }}"
+                                                                name="{{ court.pk }}"
+                                                                {% if court.checked %}checked="checked"{% endif %}
+                                                                type="checkbox"
+                                                                class="external-input court-checkbox">
+                                                            {{ court.short_name }}
+                                                            {% if court.end_date %}
+                                                                <span class="gray">({{ court.end_date.year }})</span>
+                                                            {% endif %}
+                                                        </label>
+                                                    </div>
+                                                {% endfor %}
+                                            </div>
+                                        {% endfor %}
+                                </div>
+                            {% endif %}
+
+                            {% if courts.native %}
+                                <h3 class="bottom inline">
+                                    Native American Courts
+                                </h3>
+                                <div class="row">
+                                    {% for row in courts.native|rows:3 %}
+                                            <div class="col-sm-4">
+                                                {% for court in row %}
+                                                    <div class="checkbox">
+                                                        <label for="id_{{ court.pk }}">
+                                                        <input
+                                                                id="id_{{ court.pk }}"
+                                                                name="{{ court.pk }}"
+                                                                {% if court.checked %}checked="checked"{% endif %}
+                                                                type="checkbox"
+                                                                class="external-input court-checkbox">
+                                                            {{ court.short_name }}
+                                                            {% if court.end_date %}
+                                                                <span class="gray">({{ court.end_date.year }})</span>
+                                                            {% endif %}
+                                                        </label>
+                                                    </div>
+                                                {% endfor %}
+                                            </div>
+                                        {% endfor %}
+                                </div>
+                            {% endif %}
+
                             {# Regroup into closed/open courts #}
                             {% regroup  courts.special|dictsort:"is_terminated" by is_terminated as special_courts %}
 
@@ -228,61 +285,6 @@ <h3 class="bottom inline">
                                     {% endfor %}
                                 </div>
                             {% endfor %}
-                        {% if courts.military %}
-                            <h3 class="bottom inline">
-                                Military Courts
-                            </h3>
-                            <div class="row">
-                                {% for row in courts.military|rows:3 %}
-                                        <div class="col-sm-4">
-                                            {% for court in row %}
-                                                <div class="checkbox">
-                                                    <label for="id_{{ court.pk }}">
-                                                    <input
-                                                            id="id_{{ court.pk }}"
-                                                            name="{{ court.pk }}"
-                                                            {% if court.checked %}checked="checked"{% endif %}
-                                                            type="checkbox"
-                                                            class="external-input court-checkbox">
-                                                        {{ court.short_name }}
-                                                        {% if court.end_date %}
-                                                            <span class="gray">({{ court.end_date.year }})</span>
-                                                        {% endif %}
-                                                    </label>
-                                                </div>
-                                            {% endfor %}
-                                        </div>
-                                    {% endfor %}
-                            </div>
-                        {% endif %}
-
-                        {% if courts.native %}
-                            <h3 class="bottom inline">
-                                Native American Courts
-                            </h3>
-                            <div class="row">
-                                {% for row in courts.native|rows:3 %}
-                                        <div class="col-sm-4">
-                                            {% for court in row %}
-                                                <div class="checkbox">
-                                                    <label for="id_{{ court.pk }}">
-                                                    <input
-                                                            id="id_{{ court.pk }}"
-                                                            name="{{ court.pk }}"
-                                                            {% if court.checked %}checked="checked"{% endif %}
-                                                            type="checkbox"
-                                                            class="external-input court-checkbox">
-                                                        {{ court.short_name }}
-                                                        {% if court.end_date %}
-                                                            <span class="gray">({{ court.end_date.year }})</span>
-                                                        {% endif %}
-                                                    </label>
-                                                </div>
-                                            {% endfor %}
-                                        </div>
-                                    {% endfor %}
-                            </div>
-                        {% endif %}
                         </div>
                     {% endif %}
                 </div>

From 946641c8a7f3eab9082c82ab5f0c267b3801d66a Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Thu, 22 Aug 2024 12:38:42 -0700
Subject: [PATCH 236/372] fix(picker): Switch to "Tribal"

---
 cl/lib/search_utils.py                                      | 4 ++--
 cl/search/templates/includes/jurisdiction_picker_modal.html | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py
index d69734733a..3923cf0f4a 100644
--- a/cl/lib/search_utils.py
+++ b/cl/lib/search_utils.py
@@ -231,7 +231,7 @@ def merge_form_with_courts(
         "state": [],
         "special": [],
         "military": [],
-        "native": [],
+        "tribal": [],
     }
     bap_bundle = []
     b_bundle = []
@@ -261,7 +261,7 @@ def merge_form_with_courts(
         elif court.jurisdiction in Court.MILITARY_JURISDICTIONS:
             court_tabs["military"].append(court)
         elif court.jurisdiction in Court.TRIBAL_JURISDICTIONS:
-            court_tabs["native"].append(court)
+            court_tabs["tribal"].append(court)
 
     # Put the bankruptcy bundles in the courts dict
     if bap_bundle:
diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index d7da1ceba3..657d9eefa8 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -223,12 +223,12 @@ <h3 class="bottom inline">
                                 </div>
                             {% endif %}
 
-                            {% if courts.native %}
+                            {% if courts.tribal %}
                                 <h3 class="bottom inline">
-                                    Native American Courts
+                                    Tribal Courts
                                 </h3>
                                 <div class="row">
-                                    {% for row in courts.native|rows:3 %}
+                                    {% for row in courts.tribal|rows:3 %}
                                             <div class="col-sm-4">
                                                 {% for court in row %}
                                                     <div class="checkbox">

From 00500042df7a19b917f2ca6a5a121476f6b8c866 Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Thu, 22 Aug 2024 12:40:28 -0700
Subject: [PATCH 237/372] reformat(html): Move to two spaces

---
 .../includes/jurisdiction_picker_modal.html   | 567 +++++++++---------
 1 file changed, 287 insertions(+), 280 deletions(-)

diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index 657d9eefa8..ba202b74e9 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -2,297 +2,304 @@
 {% load extras %}
 
 <div class="modal" id="court-picker" role="dialog" aria-hidden="true">
-    <div class="modal-dialog" id="modal-court-picker">
-        <form id="court-picker-search-form" method="get" action="/">
-            <div class="modal-content">
-                <div class="row">
-                    <div class="col-xs-10">
-                        <div class="row">
-                            <div class="col-xs-12 col-sm-5">
-                                <h2 class="top">Select&nbsp;Jurisdictions</h2>
-                            </div>
-                            <div class="col-xs-12 col-sm-7">
-                                <div class="btn-group btn-group-xs">
-                                     <button type="button"
-                                             class="btn btn-default"
-                                             id="check-all">Check All</button>
-                                     <button type="button"
-                                             class="btn btn-default"
-                                             id="clear-all">Clear All</button>
-                                    {% if search_form.type.value != SEARCH_TYPES.ORAL_ARGUMENT and type != SEARCH_TYPES.ORAL_ARGUMENT %}
-                                        <button type="button"
-                                                class="btn btn-default"
-                                                id="check-current">Check Current Tab</button>
-                                        <button type="button"
-                                                class="btn btn-default"
-                                                id="clear-current">Clear Current Tab</button>
-                                    {% endif %}
-                                </div>
-                            </div>
-                        </div>
-                    </div>
-
-                    <div class="col-xs-2 right">
-                        <a class="close float-right pointer"
-                           data-dismiss="modal" aria-hidden="true">&times;</a>
-                    </div>
+  <div class="modal-dialog" id="modal-court-picker">
+    <form id="court-picker-search-form" method="get" action="/">
+      <div class="modal-content">
+        <div class="row">
+          <div class="col-xs-10">
+            <div class="row">
+              <div class="col-xs-12 col-sm-5">
+                <h2 class="top">Select&nbsp;Jurisdictions</h2>
+              </div>
+              <div class="col-xs-12 col-sm-7">
+                <div class="btn-group btn-group-xs">
+                  <button type="button"
+                          class="btn btn-default"
+                          id="check-all">Check All
+                  </button>
+                  <button type="button"
+                          class="btn btn-default"
+                          id="clear-all">Clear All
+                  </button>
+                  {% if search_form.type.value != SEARCH_TYPES.ORAL_ARGUMENT and type != SEARCH_TYPES.ORAL_ARGUMENT %}
+                    <button type="button"
+                            class="btn btn-default"
+                            id="check-current">Check Current Tab
+                    </button>
+                    <button type="button"
+                            class="btn btn-default"
+                            id="clear-current">Clear Current Tab
+                    </button>
+                  {% endif %}
                 </div>
-                <label class="sr-only"
-                       for="court-filter">Select Jurisdictions by Typing</label>
-                <input id="court-filter"
-                       type="text"
-                       placeholder="Select Jurisdictions by Typing"
-                       autocomplete="off"
-                       class="hidden-xs"/>
-                {% with search_form.type.value as v %}
-                <ul class="nav nav-tabs" id="court-picker-tabs">
-                    {% comment %}
-                                       Which Types Get Which Tabs?
-                              ┌──────┬───────┬───────┬───────┬──────┐
-                              │ App. │ Dist. │ Bank. │ State │ More │
-                              ├──────┼───────┼───────┼───────┼──────┤
-                     opinions │   X  │   X   │   X   │   X   │   X  │
-                        recap │   X  │   X   │   X   │       │   X  │
-                    oral args │   X  │       │       │       │      │
-                       people │   X  │   X   │   X   │   X   │   X  │
-                              └──────┴───────┴───────┴───────┴──────┘
+              </div>
+            </div>
+          </div>
 
-                    {% endcomment %}
-                    <li class="active">
-                        <a href="#tab-federal-appellate"
-                           data-toggle="tab">
-                            <span class="hidden-xs hidden-sm">Federal Appellate</span>
-                            <span class="hidden-md hidden-lg">Fed. App.</span>
-                        </a>
-                    </li>
-                    {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
-                        <li>
-                            <a href="#tab-federal-district"
-                               data-toggle="tab">
-                                <span class="hidden-xs hidden-sm">Federal District</span>
-                                <span class="hidden-md hidden-lg">Fed. Dist.</span>
-                            </a>
-                        </li>
-                        <li>
-                            <a href="#tab-bankruptcy"
-                               data-toggle="tab">
-                                <span class="hidden-xs hidden-sm">Bankruptcy</span>
-                                <span class="hidden-md hidden-lg">Bankr.</span>
-                            </a>
-                        </li>
-                    {% endif %}
-                    {% if v == SEARCH_TYPES.OPINION or v == SEARCH_TYPES.PEOPLE %}
-                        <li>
-                            <a href="#tab-state"
-                               data-toggle="tab">State</a>
-                        </li>
-                    {% endif %}
-                    {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
-                        <li>
-                            <a href="#tab-more"
-                               data-toggle="tab">More</a>
-                        </li>
+          <div class="col-xs-2 right">
+            <a class="close float-right pointer"
+               data-dismiss="modal" aria-hidden="true">&times;</a>
+          </div>
+        </div>
+        <label class="sr-only"
+               for="court-filter">Select Jurisdictions by Typing</label>
+        <input id="court-filter"
+               type="text"
+               placeholder="Select Jurisdictions by Typing"
+               autocomplete="off"
+               class="hidden-xs"/>
+        {% with search_form.type.value as v %}
+          <ul class="nav nav-tabs" id="court-picker-tabs">
+            {% comment %}
+                               Which Types Get Which Tabs?
+                      ┌──────┬───────┬───────┬───────┬──────┐
+                      │ App. │ Dist. │ Bank. │ State │ More │
+                      ├──────┼───────┼───────┼───────┼──────┤
+             opinions │   X  │   X   │   X   │   X   │   X  │
+                recap │   X  │   X   │   X   │       │   X  │
+            oral args │   X  │       │       │       │      │
+               people │   X  │   X   │   X   │   X   │   X  │
+                      └──────┴───────┴───────┴───────┴──────┘
+  
+            {% endcomment %}
+            <li class="active">
+              <a href="#tab-federal-appellate"
+                 data-toggle="tab">
+                <span class="hidden-xs hidden-sm">Federal Appellate</span>
+                <span class="hidden-md hidden-lg">Fed. App.</span>
+              </a>
+            </li>
+            {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
+              <li>
+                <a href="#tab-federal-district"
+                   data-toggle="tab">
+                  <span class="hidden-xs hidden-sm">Federal District</span>
+                  <span class="hidden-md hidden-lg">Fed. Dist.</span>
+                </a>
+              </li>
+              <li>
+                <a href="#tab-bankruptcy"
+                   data-toggle="tab">
+                  <span class="hidden-xs hidden-sm">Bankruptcy</span>
+                  <span class="hidden-md hidden-lg">Bankr.</span>
+                </a>
+              </li>
+            {% endif %}
+            {% if v == SEARCH_TYPES.OPINION or v == SEARCH_TYPES.PEOPLE %}
+              <li>
+                <a href="#tab-state"
+                   data-toggle="tab">State</a>
+              </li>
+            {% endif %}
+            {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
+              <li>
+                <a href="#tab-more"
+                   data-toggle="tab">More</a>
+              </li>
+            {% endif %}
+          </ul>
+          <div class="tab-content">
+            <div class="tab-pane active" id="tab-federal-appellate">
+              <div class="row">
+                {% for row in courts.federal|rows:4 %}
+                  <div class="col-sm-3">
+                    {% for court in row %}
+                      {% include "includes/court_checkbox.html" %}
+                    {% endfor %}
+                  </div>
+                {% endfor %}
+              </div>
+            </div>
+            {% if search_form.type.value != SEARCH_TYPES.ORAL_ARGUMENT %}
+              <div class="tab-pane" id="tab-federal-district">
+                {# Regroup into closed/open courts #}
+                {% regroup  courts.district|dictsort:"is_terminated" by is_terminated as district_courts %}
+
+                {% for group in district_courts %}
+                  <h3 class="bottom inline">
+                    {% if group.grouper %}
+                      Historical Courts
+                    {% else %}
+                      Miscellaneous Courts
                     {% endif %}
-                </ul>
-                <div class="tab-content">
-                    <div class="tab-pane active" id="tab-federal-appellate">
-                        <div class="row">
-                            {% for row in courts.federal|rows:4 %}
-                                <div class="col-sm-3">
-                                    {% for court in row %}
-                                      {% include "includes/court_checkbox.html" %}
-                                    {% endfor %}
-                                </div>
-                            {% endfor %}
+                  </h3>
+                  <div class="row">
+                    {% for row in group.list|rows:4 %}
+                      <div class="col-sm-3">
+                        {% for court in row %}
+                          {% include "includes/court_checkbox.html" with hide_term_str=True %}
+                        {% endfor %}
+                      </div>
+                    {% endfor %}
+                  </div>
+                {% endfor %}
+              </div>
+              <div class="tab-pane" id="tab-bankruptcy">
+                {% if courts.bankruptcy_panel %}
+                  <h3 class="bottom inline">
+                    Bankruptcy Appellate Panels
+                  </h3>
+                  {% for bank_bundle in courts.bankruptcy_panel %}
+                    <div class="row">
+                      {% for row in bank_bundle|rows:4 %}
+                        <div class="col-sm-3">
+                          {% for court in row %}
+                            {% include "includes/court_checkbox.html" %}
+                          {% endfor %}
                         </div>
+                      {% endfor %}
                     </div>
-                    {% if search_form.type.value != SEARCH_TYPES.ORAL_ARGUMENT %}
-                        <div class="tab-pane" id="tab-federal-district">
-                            {# Regroup into closed/open courts #}
-                            {% regroup  courts.district|dictsort:"is_terminated" by is_terminated as district_courts %}
-
-                            {% for group in district_courts %}
-                                <h3 class="bottom inline">
-                                    {% if group.grouper %}
-                                        Historical Courts
-                                    {% else %}
-                                        Miscellaneous Courts
-                                    {% endif %}
-                                </h3>
-                                <div class="row">
-                                    {% for row in group.list|rows:4 %}
-                                        <div class="col-sm-3">
-                                            {% for court in row %}
-                                                {% include "includes/court_checkbox.html" with hide_term_str=True %}
-                                            {% endfor %}
-                                        </div>
-                                    {% endfor %}
-                                </div>
-                            {% endfor %}
-                        </div>
-                        <div class="tab-pane" id="tab-bankruptcy">
-                          {% if courts.bankruptcy_panel %}
-                            <h3 class="bottom inline">
-                              Bankruptcy Appellate Panels
-                            </h3>
-                            {% for bank_bundle in courts.bankruptcy_panel %}
-                              <div class="row">
-                                {% for row in bank_bundle|rows:4 %}
-                                  <div class="col-sm-3">
-                                    {% for court in row %}
-                                      {% include "includes/court_checkbox.html" %}
-                                    {% endfor %}
-                                  </div>
-                                {% endfor %}
-                              </div>
-                            {% endfor %}
-                          {% endif %}
-                          {% if courts.bankruptcy %}
-                            <h3 class="bottom inline">
-                              Bankruptcy District Courts
-                            </h3>
-                            {% for bank_bundle in courts.bankruptcy %}
-                              <div class="row">
-                                {% for row in bank_bundle|rows:4 %}
-                                  <div class="col-sm-3">
-                                    {% for court in row %}
-                                      {% include "includes/court_checkbox.html" %}
-                                    {% endfor %}
-                                  </div>
-                                {% endfor %}
-                              </div>
-                            {% endfor %}
-                          {% endif %}
+                  {% endfor %}
+                {% endif %}
+                {% if courts.bankruptcy %}
+                  <h3 class="bottom inline">
+                    Bankruptcy District Courts
+                  </h3>
+                  {% for bank_bundle in courts.bankruptcy %}
+                    <div class="row">
+                      {% for row in bank_bundle|rows:4 %}
+                        <div class="col-sm-3">
+                          {% for court in row %}
+                            {% include "includes/court_checkbox.html" %}
+                          {% endfor %}
                         </div>
-                    {% endif %}
+                      {% endfor %}
+                    </div>
+                  {% endfor %}
+                {% endif %}
+              </div>
+            {% endif %}
 
-                    {% if v == SEARCH_TYPES.OPINION or v == SEARCH_TYPES.PEOPLE %}
-                        <div class="tab-pane" id="tab-state">
-                            {% for group in courts.state %}
-                                {% if forloop.counter == 1 %}
-                                    <h3 class="bottom inline">State Courts</h3>
-                                {% elif forloop.counter == 2 %}
-                                    <hr>
-                                    <h3 class="bottom inline">U.S. Territory Courts</h3>
-                                {% endif %}
-                            <div class="row">
-                                {% for col_bundle in group|group_courts:3 %}
-                                    <div class="col-sm-4">
-                                    {% for court in col_bundle %}
-                                        {% if court.jurisdiction == 'S' or court.jurisdiction == 'TS' %}
-                                            {% include "includes/court_checkbox.html" %}
-                                        {% else %}
-                                            {% include "includes/court_checkbox.html" with indent=True %}
-                                        {% endif %}
-                                    {% endfor %}
-                                    </div>
-                                {% endfor %}
-                            </div>
-                            {% endfor %}
-                        </div>
-                    {% endif %}
+            {% if v == SEARCH_TYPES.OPINION or v == SEARCH_TYPES.PEOPLE %}
+              <div class="tab-pane" id="tab-state">
+                {% for group in courts.state %}
+                  {% if forloop.counter == 1 %}
+                    <h3 class="bottom inline">State Courts</h3>
+                  {% elif forloop.counter == 2 %}
+                    <hr>
+                    <h3 class="bottom inline">U.S. Territory Courts</h3>
+                  {% endif %}
+                  <div class="row">
+                    {% for col_bundle in group|group_courts:3 %}
+                      <div class="col-sm-4">
+                        {% for court in col_bundle %}
+                          {% if court.jurisdiction == 'S' or court.jurisdiction == 'TS' %}
+                            {% include "includes/court_checkbox.html" %}
+                          {% else %}
+                            {% include "includes/court_checkbox.html" with indent=True %}
+                          {% endif %}
+                        {% endfor %}
+                      </div>
+                    {% endfor %}
+                  </div>
+                {% endfor %}
+              </div>
+            {% endif %}
 
-                    {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
-                        <div class="tab-pane" id="tab-more">
+            {% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
+              <div class="tab-pane" id="tab-more">
 
-                             {% if courts.military %}
-                                <h3 class="bottom inline">
-                                    Military Courts
-                                </h3>
-                                <div class="row">
-                                    {% for row in courts.military|rows:3 %}
-                                            <div class="col-sm-4">
-                                                {% for court in row %}
-                                                    <div class="checkbox">
-                                                        <label for="id_{{ court.pk }}">
-                                                        <input
-                                                                id="id_{{ court.pk }}"
-                                                                name="{{ court.pk }}"
-                                                                {% if court.checked %}checked="checked"{% endif %}
-                                                                type="checkbox"
-                                                                class="external-input court-checkbox">
-                                                            {{ court.short_name }}
-                                                            {% if court.end_date %}
-                                                                <span class="gray">({{ court.end_date.year }})</span>
-                                                            {% endif %}
-                                                        </label>
-                                                    </div>
-                                                {% endfor %}
-                                            </div>
-                                        {% endfor %}
-                                </div>
-                            {% endif %}
+                {% if courts.military %}
+                  <h3 class="bottom inline">
+                    Military Courts
+                  </h3>
+                  <div class="row">
+                    {% for row in courts.military|rows:3 %}
+                      <div class="col-sm-4">
+                        {% for court in row %}
+                          <div class="checkbox">
+                            <label for="id_{{ court.pk }}">
+                              <input
+                                id="id_{{ court.pk }}"
+                                name="{{ court.pk }}"
+                                {% if court.checked %}checked="checked"{% endif %}
+                                type="checkbox"
+                                class="external-input court-checkbox">
+                              {{ court.short_name }}
+                              {% if court.end_date %}
+                                <span
+                                  class="gray">({{ court.end_date.year }})</span>
+                              {% endif %}
+                            </label>
+                          </div>
+                        {% endfor %}
+                      </div>
+                    {% endfor %}
+                  </div>
+                {% endif %}
 
-                            {% if courts.tribal %}
-                                <h3 class="bottom inline">
-                                    Tribal Courts
-                                </h3>
-                                <div class="row">
-                                    {% for row in courts.tribal|rows:3 %}
-                                            <div class="col-sm-4">
-                                                {% for court in row %}
-                                                    <div class="checkbox">
-                                                        <label for="id_{{ court.pk }}">
-                                                        <input
-                                                                id="id_{{ court.pk }}"
-                                                                name="{{ court.pk }}"
-                                                                {% if court.checked %}checked="checked"{% endif %}
-                                                                type="checkbox"
-                                                                class="external-input court-checkbox">
-                                                            {{ court.short_name }}
-                                                            {% if court.end_date %}
-                                                                <span class="gray">({{ court.end_date.year }})</span>
-                                                            {% endif %}
-                                                        </label>
-                                                    </div>
-                                                {% endfor %}
-                                            </div>
-                                        {% endfor %}
-                                </div>
-                            {% endif %}
+                {% if courts.tribal %}
+                  <h3 class="bottom inline">
+                    Tribal Courts
+                  </h3>
+                  <div class="row">
+                    {% for row in courts.tribal|rows:3 %}
+                      <div class="col-sm-4">
+                        {% for court in row %}
+                          <div class="checkbox">
+                            <label for="id_{{ court.pk }}">
+                              <input
+                                id="id_{{ court.pk }}"
+                                name="{{ court.pk }}"
+                                {% if court.checked %}checked="checked"{% endif %}
+                                type="checkbox"
+                                class="external-input court-checkbox">
+                              {{ court.short_name }}
+                              {% if court.end_date %}
+                                <span
+                                  class="gray">({{ court.end_date.year }})</span>
+                              {% endif %}
+                            </label>
+                          </div>
+                        {% endfor %}
+                      </div>
+                    {% endfor %}
+                  </div>
+                {% endif %}
 
-                            {# Regroup into closed/open courts #}
-                            {% regroup  courts.special|dictsort:"is_terminated" by is_terminated as special_courts %}
+                {# Regroup into closed/open courts #}
+                {% regroup  courts.special|dictsort:"is_terminated" by is_terminated as special_courts %}
 
-                            {% for group in special_courts %}
-                                <h3 class="bottom inline">
-                                    {% if group.grouper %}
-                                        Historical Courts
-                                    {% else %}
-                                        Miscellaneous Courts
-                                    {% endif %}
-                                </h3>
-                                <div class="row">
-                                    {% for row in group.list|rows:3 %}
-                                        <div class="col-sm-4">
-                                            {% for court in row %}
-                                                <div class="checkbox">
-                                                    <label for="id_{{ court.pk }}">
-                                                    <input
-                                                            id="id_{{ court.pk }}"
-                                                            name="{{ court.pk }}"
-                                                            {% if court.checked %}checked="checked"{% endif %}
-                                                            type="checkbox"
-                                                            class="external-input court-checkbox">
-                                                        {{ court.short_name }}
-                                                        {% if court.end_date %}
-                                                            <span class="gray">({{ court.end_date.year }})</span>
-                                                        {% endif %}
-                                                    </label>
-                                                </div>
-                                            {% endfor %}
-                                        </div>
-                                    {% endfor %}
-                                </div>
-                            {% endfor %}
-                        </div>
+                {% for group in special_courts %}
+                  <h3 class="bottom inline">
+                    {% if group.grouper %}
+                      Historical Courts
+                    {% else %}
+                      Miscellaneous Courts
                     {% endif %}
-                </div>
-                {% endwith %}
-                <div class="text-right">
-                    <button type="submit" class="btn btn-primary btn-lg">Apply</button>
-                </div>
-            </div>
-        </form>
-    </div>
+                  </h3>
+                  <div class="row">
+                    {% for row in group.list|rows:3 %}
+                      <div class="col-sm-4">
+                        {% for court in row %}
+                          <div class="checkbox">
+                            <label for="id_{{ court.pk }}">
+                              <input
+                                id="id_{{ court.pk }}"
+                                name="{{ court.pk }}"
+                                {% if court.checked %}checked="checked"{% endif %}
+                                type="checkbox"
+                                class="external-input court-checkbox">
+                              {{ court.short_name }}
+                              {% if court.end_date %}
+                                <span
+                                  class="gray">({{ court.end_date.year }})</span>
+                              {% endif %}
+                            </label>
+                          </div>
+                        {% endfor %}
+                      </div>
+                    {% endfor %}
+                  </div>
+                {% endfor %}
+              </div>
+            {% endif %}
+          </div>
+        {% endwith %}
+        <div class="text-right">
+          <button type="submit" class="btn btn-primary btn-lg">Apply</button>
+        </div>
+      </div>
+    </form>
+  </div>
 </div>

From adbf2a5e82c372ba2da59d3ecb0985961360398f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 22 Aug 2024 19:41:36 +0000
Subject: [PATCH 238/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/search/templates/includes/jurisdiction_picker_modal.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index ba202b74e9..316cb21d94 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -60,7 +60,7 @@ <h2 class="top">Select&nbsp;Jurisdictions</h2>
             oral args │   X  │       │       │       │      │
                people │   X  │   X   │   X   │   X   │   X  │
                       └──────┴───────┴───────┴───────┴──────┘
-  
+
             {% endcomment %}
             <li class="active">
               <a href="#tab-federal-appellate"

From 4937e73721d803216f25c5e5a4f8377974dedc76 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Thu, 22 Aug 2024 17:39:35 -0400
Subject: [PATCH 239/372] feat(templates): Removes rebranding announcement
 banner

---
 cl/assets/templates/base.html | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cl/assets/templates/base.html b/cl/assets/templates/base.html
index f6ff99f3b7..33181f52e1 100644
--- a/cl/assets/templates/base.html
+++ b/cl/assets/templates/base.html
@@ -87,8 +87,6 @@ <h1>You did not supply the "private" variable to your template.
       {% include 'includes/dismissible_nav_banner.html' with link="https://free.law/2024/01/18/new-recap-archive-search-is-live" text="A year in the making, today we are launching a huge new search engine for the RECAP Archive" emoji="&#127873;" cookie_name="no_banner"%}
     {% endif %}
 
-    {% include 'includes/dismissible_nav_banner.html' with link="https://free.law/2024/07/05/new-branding-rip-flip" text="CourtListener, RECAP, and Free Law Project are getting some new logos and new branding!" emoji="&#128276;" cookie_name="no_branding_banner"%}
-
     <!-- Broken Email Banner -->
     {% if EMAIL_BAN_REASON %}
       <div class="navbar navbar-default subnav  alert-danger alert-dismissible broken-email-banner" role="navigation">

From f5a8df6fe227db904f9d479c8398a6074009412d Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Fri, 23 Aug 2024 18:24:53 +0000
Subject: [PATCH 240/372] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 72f2eb6144..5df0ebc5ed 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4073,13 +4073,13 @@ files = [
 
 [[package]]
 name = "reporters-db"
-version = "3.2.43"
+version = "3.2.44"
 description = "Database of Court Reporters"
 optional = false
 python-versions = "*"
 files = [
-    {file = "reporters_db-3.2.43-py2.py3-none-any.whl", hash = "sha256:30dcc34f8fbdf9ab41cdb15d97c0a0cb948b0deb71f17b5e8074c7fd84c46b2f"},
-    {file = "reporters_db-3.2.43.tar.gz", hash = "sha256:3d10fff5baf4994b879895446e16bbac7b360a4fdc1f5a76377607be8ec105e2"},
+    {file = "reporters_db-3.2.44-py2.py3-none-any.whl", hash = "sha256:598d6f67cbc2ca11d7ec64e99a9e26a62ef3a64e4220a7eec8cbadbcd9e99e09"},
+    {file = "reporters_db-3.2.44.tar.gz", hash = "sha256:612890bb282e5047a31daa32222b26c635c28edf9a8b140f005e6157540f29d1"},
 ]
 
 [package.dependencies]

From 36addda36ebe4f082c9a1ef9e83be89b1b90633b Mon Sep 17 00:00:00 2001
From: Vijay Anne <69829523+v-anne@users.noreply.github.com>
Date: Fri, 23 Aug 2024 19:52:36 -0400
Subject: [PATCH 241/372] Update cl/lib/model_helpers.py

Correcting error

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/lib/model_helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 28d2e503ed..d0dde74062 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -537,7 +537,7 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
             return f"https://www.nlrb.gov/case/{formatted_number}"
 
     # US Tax Court pattern
-    if "Tax" or "Internal Revenue" in agency:
+    if any(x in agency for x in ("Tax", "Internal Revenue")):
         match = re.match(
             r"^(?:USTC-)?(\d{1,5})-(\d{2})([A-Z])?$", og_docket_number
         )

From 78642be28d85c0df1786ed3854c23e88da8681b5 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Sun, 25 Aug 2024 21:45:54 -0300
Subject: [PATCH 242/372] issue-4140 change download button to de_list

---
 cl/assets/static-global/css/override.css      |  33 ++-
 .../templates/includes/de_filter.html         | 212 ++++++++----------
 .../templates/includes/de_list.html           |  11 +-
 3 files changed, 139 insertions(+), 117 deletions(-)

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index fca9db68e8..1b4084fe88 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -745,7 +745,6 @@ div.shown ul {
   padding: 3px 0 5px 0;
 }
 
-
 #summaries ul {
   padding-inline-start: 15px;
   border-bottom: 1pt solid #DDD;
@@ -893,6 +892,38 @@ input.court-checkbox, input.status-checkbox {
   border-bottom: 1px solid #dddddd;
 }
 
+#docket-entry-button {
+  padding-left: 50px;
+  padding-bottom: 15px;
+  margin-left: 0px;
+}
+
+@media (max-width: 1180px) {
+  #docket-entry-button {
+    padding-left: 15px;
+    padding-bottom: 15px;
+    margin-left: 0px;
+  }
+}
+
+@media (max-width: 625px) {
+  #docket-entry-button {
+    padding-left: 10px;
+    padding-bottom: 15px;
+    margin-left: 0px;
+    /*margin-left: -45px;*/
+  }
+}
+
+@media (max-width: 500px) {
+  #docket-entry-button {
+    padding-left: 0px;
+    padding-bottom: 15px;
+    margin-left: 0px;
+    /*margin-left: -45px;*/
+  }
+}
+
 #docket-entry-table .recap-documents.row {
   padding-top: 0px;
   border-bottom: none;
diff --git a/cl/opinion_page/templates/includes/de_filter.html b/cl/opinion_page/templates/includes/de_filter.html
index f79cdfe1ea..a1946b23a2 100644
--- a/cl/opinion_page/templates/includes/de_filter.html
+++ b/cl/opinion_page/templates/includes/de_filter.html
@@ -1,125 +1,111 @@
 <div class="well well-sm">
   <div class="row filter-box">
-    <div >
-      <!-- Search -->
-      <div class="col-xs-12 col-md-1 col-lg-2">
-        {% if docket_entries %}
-          <div class="row" id="main-query-box">
-            <div id="search-container" class="col-xs-12 text-center">
-              <label class="sr-only" for="de-filter-search">Search</label>
-              <div class="input-group">
-                <input class="form-control"
-                       id="de-filter-search"
-                       autocomplete="off"
-                       type="text"
-                       placeholder="Search this docket">
-                <span class="input-group-btn">
-                  <a type="submit"
-                          href="/?type=r&amp;q=docket_id%3A{{ docket.pk }}"
-                          class="btn btn-primary"
-                          id="search-button-de-filter"><i class="fa fa-search"></i>
-                  </a>
-                </span>
-              </div>
+    <!-- Search -->
+    <div class="col-xs-12 col-md-2">
+      {% if docket_entries %}
+        <div class="row" id="main-query-box">
+          <div id="search-container" class="col-xs-12 text-center">
+            <label class="sr-only" for="de-filter-search">Search</label>
+            <div class="input-group">
+              <input class="form-control"
+                     id="de-filter-search"
+                     autocomplete="off"
+                     type="text"
+                     placeholder="Search this docket">
+              <span class="input-group-btn">
+                <a type="submit"
+                        href="/?type=r&amp;q=docket_id%3A{{ docket.pk }}"
+                        class="btn btn-primary"
+                        id="search-button-de-filter"><i class="fa fa-search"></i>
+                </a>
+              </span>
             </div>
           </div>
+        </div>
+      {% endif %}
+    </div>
+    <form action="">
+      <div class="form-inline">
+        {% if form.errors %}
+          <div class="col-xs-12">
+            <p class="text-danger"><strong>There were errors applying your filters.</strong>
+            </p>
+          </div>
         {% endif %}
-      </div>
-      <div class="col-xs-12 col-md-10 col-lg-9">
-        <form action="">
-        <div class="row form-inline">
-          {% if form.errors %}
-            <div class="col-xs-12">
-              <p class="text-danger"><strong>There were errors applying your filters.</strong>
-              </p>
-            </div>
-          {% endif %}
-          <div class="tight-input col-xs-12 col-sm-7 col-md-7 col-lg-7">
-            <div class="row form-inline">
-              <!-- date select -->
-              <div class="tight-input col-xs-12 col-sm-6 col-md-6 col-lg-7">
-                <div class="cl-form-group{% if form.filed_after.errors %} has-error{% endif %}">
-                  <label for="id_filed_after"
-                         class="control-label">Filed</label>
-                  {{ form.filed_after }}
-                </div>
-                <div class="cl-form-group{% if form.filed_before.errors %} has-error{% endif %}">
-                  <label for="id_filed_before"
-                         class="control-label">to</label>
-                  {{ form.filed_before }}
-                </div>
-              </div>
-              <!-- Document group -->
-              <div class="tight-input col-xs-12 col-sm-6 col-md-6 col-lg-5">
-              <div class="cl-form-group{% if form.entry_gte.errors %} has-error{% endif %}">
-                <label for="id_entry_gte" class="control-label">Documents</label>
-                {{ form.entry_gte }}
-              </div>
-              <div class="cl-form-group{% if form.entry_lte.errors %} has-error{% endif %}">
-                <label for="id_entry_lte"
-                       class="control-label">to</label>
-                {{ form.entry_lte }}
-              </div>
-            </div>
-            </div>
+        <!-- date select -->
+        <div class="tight-input col-xs-6 col-sm-5 col-md-4 col-lg-3">
+          <div class="cl-form-group{% if form.filed_after.errors %} has-error{% endif %}">
+            <label for="id_filed_after"
+                   class="control-label">Filed</label>
+            {{ form.filed_after }}
           </div>
-
-          <!-- sort ordering -->
-          <div class="tight-input col-xs-6 col-sm-3 col-md-3 col-lg-3">
-            <div id="sort-buttons"
-                 class="btn-group tight-input pull-right xs-pull-left cl-sm-pull-left"
-                 data-toggle="buttons"
-                 role="group"
-                 aria-label="sorting buttons">
-              <label for="id_order_by_0"
-                     class="btn btn-default {% if sort_order_asc %}active{% endif %}">
-                <input type="radio"
-                       id="id_order_by_0"
-                       value="asc"
-                       name="order_by"
-                       {% if sort_order_asc %}checked="checked" {%endif %}/><i class="fa fa-sort-numeric-asc"></i>&nbsp;<span class="hidden-md">Asc.</span></label>
-              <label for="id_order_by_1"
-                     class="btn btn-default {% if not sort_order_asc %}active{% endif %}">
-                <input type="radio"
-                       id="id_order_by_1"
-                       class="btn btn-default"
-                       {% if not sort_order_asc %}checked="checked"{% endif %}
-                       value="desc"
-                       name="order_by"/><i class="fa fa-sort-numeric-desc"></i>&nbsp;<span class="hidden-md">Desc.</span></label>
-            </div>
+          <div class="cl-form-group{% if form.filed_before.errors %} has-error{% endif %}">
+            <label for="id_filed_before"
+                   class="control-label">to</label>
+            {{ form.filed_before }}
           </div>
-          <!-- Navigate buttons -->
-          <div class="tight-input col-xs-6 hidden-sm col-sm-2 col-md-2 col-lg-" >
-            <div  >
-              {% if docket_entries.has_previous %}
-                <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
-                  <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
-                </a>
-              {% else %}
-                <a class="btn btn-default disabled" href="#" rel="prev" >
-                  <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
-                </a>
-              {% endif %}
-              {% if docket_entries.has_next %}
-                <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
-                  <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
-                </a>
-              {% else %}
-                <a class="btn btn-default disabled" href="#" rel="next" >
-                  <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
-                </a>
-              {% endif %}
-            </div>
+        </div>
+        <!-- Document group -->
+        <div class="tight-input col-xs-6 col-sm-4 col-md-3">
+          <div class="cl-form-group{% if form.entry_gte.errors %} has-error{% endif %}">
+            <label for="id_entry_gte" class="control-label">Documents</label>
+            {{ form.entry_gte }}
+          </div>
+          <div class="cl-form-group{% if form.entry_lte.errors %} has-error{% endif %}">
+            <label for="id_entry_lte"
+                   class="control-label">to</label>
+            {{ form.entry_lte }}
           </div>
-          <button type="submit" class="sr-only">Filter</button>
         </div>
-      </form>
-      </div>
-      <div class=" col-xs-12 col-md-1 col-lg-1">
-        <a href="{% url 'view_download_docket' docket.id %}" class="btn btn-primary pull-right">
-          <i class="fa fa-download"></i>
-          Download</a>
+        <!-- sort ordering -->
+        <div class="tight-input col-xs-6 col-sm-3 col-md-2">
+          <div id="sort-buttons"
+               class="btn-group tight-input pull-right xs-pull-left cl-sm-pull-left"
+               data-toggle="buttons"
+               role="group"
+               aria-label="sorting buttons">
+            <label for="id_order_by_0"
+                   class="btn btn-default {% if sort_order_asc %}active{% endif %}">
+              <input type="radio"
+                     id="id_order_by_0"
+                     value="asc"
+                     name="order_by"
+                     {% if sort_order_asc %}checked="checked" {%endif %}/><i class="fa fa-sort-numeric-asc"></i>&nbsp;<span class="hidden-md">Asc.</span></label>
+            <label for="id_order_by_1"
+                   class="btn btn-default {% if not sort_order_asc %}active{% endif %}">
+              <input type="radio"
+                     id="id_order_by_1"
+                     class="btn btn-default"
+                     {% if not sort_order_asc %}checked="checked"{% endif %}
+                     value="desc"
+                     name="order_by"/><i class="fa fa-sort-numeric-desc"></i>&nbsp;<span class="hidden-md">Desc.</span></label>
+          </div>
+        </div>
+        <!-- Navigate buttons -->
+        <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
+          <div class="pull-right" >
+            {% if docket_entries.has_previous %}
+              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
+                <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
+              </a>
+            {% else %}
+              <a class="btn btn-default disabled" href="#" rel="prev" >
+                <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
+              </a>
+            {% endif %}
+            {% if docket_entries.has_next %}
+              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
+                <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
+              </a>
+            {% else %}
+              <a class="btn btn-default disabled" href="#" rel="next" >
+                <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
+              </a>
+            {% endif %}
+          </div>
+        </div>
+        <button type="submit" class="sr-only">Filter</button>
       </div>
-    </div>
+    </form>
   </div>
 </div>
diff --git a/cl/opinion_page/templates/includes/de_list.html b/cl/opinion_page/templates/includes/de_list.html
index 1f43612f7e..e750848cb8 100644
--- a/cl/opinion_page/templates/includes/de_list.html
+++ b/cl/opinion_page/templates/includes/de_list.html
@@ -4,11 +4,16 @@
 
 <div class="fake-table col-xs-12" id="docket-entry-table">
   <div class="row bold">
-    <div class="col-xs-1 text-center">
+    <div class="col-xs-1 col-sm-1">
       <p class="hidden-xs">Document Number</p>
     </div>
-    <div class="col-xs-3 col-sm-2">Date&nbsp;Filed</div>
-    <div class="col-xs-7 col-sm-6">Description</div>
+    <div class="col-xs-3 col-sm-3">Date&nbsp;Filed</div>
+    <div class="col-xs-7 col-sm-7">Description</div>
+    <div class="col-xs-1 col-sm-1" id="docket-entry-button">
+      <a href="{% url 'view_download_docket' docket.id %}" class="btn btn-primary">
+          <i class="fa fa-download"></i>
+          </a>
+    </div>
   </div>
   {% for de in docket_entries %}
     <div class="row {% cycle "odd" "even" %}"

From f11fedeeccdea344f6acc523699c0144a3bc0878 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 00:46:58 +0000
Subject: [PATCH 243/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/lib/model_helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 082e436d25..7325e318b0 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -544,7 +544,7 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
 
 class CSVExportMixin:
 
-    def get_csv_columns(self, get_column_name: bool=False) -> List[str]:
+    def get_csv_columns(self, get_column_name: bool = False) -> List[str]:
         """Get list of column names required in a csv file.
         If get column name is True. It will add class name to attribute
 

From 82ca8f17f795740f66690ee64647004cce196b08 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Sun, 25 Aug 2024 22:29:19 -0300
Subject: [PATCH 244/372] issue-4140 change css and text

---
 cl/assets/static-global/css/override.css      | 19 +++++++++++++------
 .../templates/includes/de_list.html           | 10 ++++++----
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 1b4084fe88..b6af1c3567 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -893,25 +893,32 @@ input.court-checkbox, input.status-checkbox {
 }
 
 #docket-entry-button {
-  padding-left: 50px;
+  padding-left: 45px;
   padding-bottom: 15px;
   margin-left: 0px;
 }
 
-@media (max-width: 1180px) {
+@media (max-width: 1200px) {
   #docket-entry-button {
-    padding-left: 15px;
+    padding-left: 90px;
     padding-bottom: 15px;
     margin-left: 0px;
   }
 }
 
-@media (max-width: 625px) {
+@media (max-width: 990px) {
   #docket-entry-button {
-    padding-left: 10px;
+    padding-left: 35px;
+    padding-bottom: 15px;
+    margin-left: 0px;
+  }
+}
+
+@media (max-width: 750px) {
+  #docket-entry-button {
+    padding-left: 20px;
     padding-bottom: 15px;
     margin-left: 0px;
-    /*margin-left: -45px;*/
   }
 }
 
diff --git a/cl/opinion_page/templates/includes/de_list.html b/cl/opinion_page/templates/includes/de_list.html
index e750848cb8..1b94ff55a2 100644
--- a/cl/opinion_page/templates/includes/de_list.html
+++ b/cl/opinion_page/templates/includes/de_list.html
@@ -7,11 +7,13 @@
     <div class="col-xs-1 col-sm-1">
       <p class="hidden-xs">Document Number</p>
     </div>
-    <div class="col-xs-3 col-sm-3">Date&nbsp;Filed</div>
-    <div class="col-xs-7 col-sm-7">Description</div>
-    <div class="col-xs-1 col-sm-1" id="docket-entry-button">
-      <a href="{% url 'view_download_docket' docket.id %}" class="btn btn-primary">
+    <div class="col-xs-3 col-sm-2">Date&nbsp;Filed</div>
+    <div class="col-xs-6 col-sm-5">Description</div>
+    <div class="col-xs-2 col-sm-3" id="docket-entry-button">
+      <a href="{% url 'view_download_docket' docket.id %}" class="btn btn-default btn-xs">
           <i class="fa fa-download"></i>
+        <!--span class="d-none d-sm-inline">Export CSV</span-->
+        <span class="d-inline d-sm-none">CSV</span>
           </a>
     </div>
   </div>

From 502dea0b6af10e9c1090f857900a459a5d28d7b1 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Sun, 25 Aug 2024 22:37:35 -0300
Subject: [PATCH 245/372] issue-4140 add method to get path in searc.models for
 recap doc

---
 cl/search/models.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cl/search/models.py b/cl/search/models.py
index 6ddcc7950f..a4bb1786a1 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -1790,6 +1790,11 @@ def _get_readable_document_type(self, *args, **kwargs):
     def _get_readable_ocr_status(self, *args, **kwargs):
         return self.get_ocr_status_display()
 
+    def _get_full_filepath_local(self, *args, **kwargs):
+        if self.filepath_local:
+            return f"https://storage.courtlistener.com/{self.filepath_local}"
+        return ""
+
     def get_column_function(self):
         """Get dict of attrs: function to apply on field value if it needs
         to be pre-processed before being add to csv
@@ -1799,6 +1804,7 @@ def get_column_function(self):
         return {
             "document_type": self._get_readable_document_type,
             "ocr_status": self._get_readable_ocr_status,
+            "filepath_local": self._get_full_filepath_local,
         }
 
 

From 537cacf85675382dce533f55cab9c00588a393b1 Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Mon, 26 Aug 2024 16:10:22 +0000
Subject: [PATCH 246/372] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 5df0ebc5ed..ec0a881741 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2236,13 +2236,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.18"
+version = "2.6.19"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.18-py27-none-any.whl", hash = "sha256:f6bd34df64fe8c739e342a95e56de1b1c2b21ff2edce70ea8607d216dcaf299d"},
-    {file = "juriscraper-2.6.18.tar.gz", hash = "sha256:2162c880c039f46069bcdad714b90b8a437b7274662e1e9e849e26266e8dd065"},
+    {file = "juriscraper-2.6.19-py27-none-any.whl", hash = "sha256:fe5807f1ae7ecb4adcc971c5351fb56d7af71547f39d9ee56a78821c80db8276"},
+    {file = "juriscraper-2.6.19.tar.gz", hash = "sha256:ebb7312efa1f6b8ddfb5cf0b9ea678dbe40a248f575559860816ed4af507af9d"},
 ]
 
 [package.dependencies]

From 2352dcb89ad9505323c7c420eaadb735302e773b Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 26 Aug 2024 16:49:34 -0400
Subject: [PATCH 247/372] feat(template): Improve responsive design for export
 CSV button

---
 cl/assets/static-global/css/override.css      | 49 ++++---------------
 .../templates/includes/de_list.html           | 30 +++++++++---
 2 files changed, 32 insertions(+), 47 deletions(-)

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index b6af1c3567..4f27744cab 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -880,11 +880,11 @@ input.court-checkbox, input.status-checkbox {
 
 /* Docket page */
 #id_entry_gte, #id_entry_lte {
-  width: 45px;
+  width: 60px;
 }
 
 #id_filed_after, #id_filed_before {
-  width: 90px;
+  width: 95px;
 }
 
 #docket-entry-table .row {
@@ -892,45 +892,16 @@ input.court-checkbox, input.status-checkbox {
   border-bottom: 1px solid #dddddd;
 }
 
-#docket-entry-button {
-  padding-left: 45px;
-  padding-bottom: 15px;
-  margin-left: 0px;
-}
-
-@media (max-width: 1200px) {
-  #docket-entry-button {
-    padding-left: 90px;
-    padding-bottom: 15px;
-    margin-left: 0px;
-  }
-}
-
-@media (max-width: 990px) {
-  #docket-entry-button {
-    padding-left: 35px;
-    padding-bottom: 15px;
-    margin-left: 0px;
-  }
-}
-
-@media (max-width: 750px) {
-  #docket-entry-button {
-    padding-left: 20px;
-    padding-bottom: 15px;
-    margin-left: 0px;
-  }
+.description-header,
+.export-csv {
+  padding: 0px;
 }
 
-@media (max-width: 500px) {
-  #docket-entry-button {
-    padding-left: 0px;
-    padding-bottom: 15px;
-    margin-left: 0px;
-    /*margin-left: -45px;*/
+@media (min-width: 767px) {
+  .export-csv {
+    padding: 5px 10px;
   }
 }
-
 #docket-entry-table .recap-documents.row {
   padding-top: 0px;
   border-bottom: none;
@@ -1644,8 +1615,8 @@ textarea {
 }
 
 .tight-input input, .tight-input a {
-  padding: 5px 5px !important;
-  margin: 0px 0px !important;
+  padding: 6px 6px;
+  margin: 0px 0px;
 }
 
 @media (max-width: 768px) {
diff --git a/cl/opinion_page/templates/includes/de_list.html b/cl/opinion_page/templates/includes/de_list.html
index 1b94ff55a2..48d7e9bbf9 100644
--- a/cl/opinion_page/templates/includes/de_list.html
+++ b/cl/opinion_page/templates/includes/de_list.html
@@ -4,17 +4,31 @@
 
 <div class="fake-table col-xs-12" id="docket-entry-table">
   <div class="row bold">
-    <div class="col-xs-1 col-sm-1">
+    <div class="col-xs-1 text-center">
       <p class="hidden-xs">Document Number</p>
     </div>
     <div class="col-xs-3 col-sm-2">Date&nbsp;Filed</div>
-    <div class="col-xs-6 col-sm-5">Description</div>
-    <div class="col-xs-2 col-sm-3" id="docket-entry-button">
-      <a href="{% url 'view_download_docket' docket.id %}" class="btn btn-default btn-xs">
-          <i class="fa fa-download"></i>
-        <!--span class="d-none d-sm-inline">Export CSV</span-->
-        <span class="d-inline d-sm-none">CSV</span>
-          </a>
+    <div class="col-xs-8 col-sm-6">
+      <div class="col-xs-9 description-header">
+        Description
+      </div>
+      <div class="col-xs-3">
+        <a href="{% url 'view_download_docket' docket.id %}" class="btn export-csv visible-xs">
+          <div class="flex align-items-center">
+            <i class="fa fa-download gray"></i>&nbsp;
+            <span>CSV</span>
+          </div>
+        </a>
+      </div>
+    </div>
+    <div class="flex justify-content-end col-xs-3">
+      <a href="{% url 'view_download_docket' docket.id %}" class="btn btn-default hidden-xs export-csv">
+        <div class="flex align-items-center">
+          <i class="fa fa-download gray hidden-lg"></i>
+          <i class="fa fa-download fa-lg gray visible-lg"></i>&nbsp;
+          <span>Export CSV</span>
+        </div>
+      </a>
     </div>
   </div>
   {% for de in docket_entries %}

From 2fda8c7203ab9ef38aff6a20a41dbdb2b806cc97 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Mon, 26 Aug 2024 22:04:55 -0300
Subject: [PATCH 248/372] issue-4140 fix order in de filter

---
 cl/opinion_page/views.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 08b1e9b816..306df25361 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -377,6 +377,7 @@ async def view_docket(
         if cd.get("filed_before"):
             de_list = de_list.filter(date_filed__lte=cd["filed_before"])
         if cd.get("order_by") == DocketEntryFilterForm.DESCENDING:
+            sort_order_asc = False
             de_list = de_list.order_by(
                 "-recap_sequence_number", "-entry_number"
             )

From e2367ff93901a8ea810e601bc78e0d882f636ffa Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Mon, 26 Aug 2024 22:09:37 -0300
Subject: [PATCH 249/372] issue-4140 clean imports and fix documentation

---
 cl/opinion_page/views.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 306df25361..3d93f427e5 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -1,6 +1,4 @@
-import csv
 import datetime
-import logging
 from collections import OrderedDict, defaultdict
 from http import HTTPStatus
 from typing import Any, Dict, Union
@@ -343,7 +341,7 @@ async def fetch_docket_entries(docket):
     """Fetch docket entries asociated to docket
 
     param docket: docket.id to get related docket_entries.
-    returns: DocketEntry list.
+    returns: DocketEntry Queryset.
     """
     de_list = docket.docket_entries.all().prefetch_related(
         Prefetch(

From 97a995cf95d6234bc2ed7feb345ea568f0e61598 Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Mon, 26 Aug 2024 22:37:50 -0300
Subject: [PATCH 250/372] issue-4140 clean tests as requested in PR comments

---
 cl/opinion_page/tests.py | 70 +++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index 7a72ba3871..72c93aaaa0 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -1,5 +1,4 @@
 # mypy: disable-error-code=attr-defined
-import asyncio
 import datetime
 import os
 import shutil
@@ -1629,12 +1628,14 @@ def tearDown(self):
         RECAPDocument.objects.all().delete()
         User.objects.all().delete()
 
-    def test_fetch_docket_entries(self) -> None:
+    async def test_fetch_docket_entries(self) -> None:
         """Verify that fetch entries function returns right docket_entries"""
-        res = asyncio.run(fetch_docket_entries(self.mocked_docket))
-        self.assertEqual(len(res), len(self.mocked_docket_entries))
-        self.assertIn(self.mocked_docket_entries[0], res)
-        self.assertNotIn(self.mocked_extra_docket_entries[0], res)
+        res = await fetch_docket_entries(self.mocked_docket)
+        self.assertEqual(await res.acount(), len(self.mocked_docket_entries))
+        self.assertTrue(await res.acontains(self.mocked_docket_entries[0]))
+        self.assertFalse(
+            await res.acontains(self.mocked_extra_docket_entries[0])
+        )
 
     def test_generate_docket_entries_csv_data(self) -> None:
         """Verify str with csv data is created. Check column and data entry"""
@@ -1644,34 +1645,31 @@ def test_generate_docket_entries_csv_data(self) -> None:
         self.assertEqual(res[:16], '"docketentry_id"')
         self.assertEqual(res_line_data[1], '"506581111"')
 
-    def test_view_download_docket_entries_csv(self) -> None:
+    @mock.patch("cl.opinion_page.utils.user_has_alert")
+    @mock.patch("cl.opinion_page.utils.core_docket_data")
+    @mock.patch("cl.opinion_page.utils.generate_docket_entries_csv_data")
+    def test_view_download_docket_entries_csv(
+        self,mock_download_function,
+        mock_core_docket_data,
+        mock_user_has_alert) -> None:
         """Test download_docket_entries_csv returns csv content"""
-        with mock.patch(
-            "cl.opinion_page.utils.generate_docket_entries_csv_data"
-        ) as mock_download_function:
-            with mock.patch(
-                "cl.opinion_page.utils.core_docket_data"
-            ) as mock_core_docket_data:
-                with mock.patch(
-                    "cl.opinion_page.utils.user_has_alert"
-                ) as mock_user_has_alert:
-                    mock_download_function.return_value = (
-                        '"col1","col2","col3"\r\n"value1","value2","value3"'
-                    )
-                    mock_user_has_alert.return_value = False
-                    mock_core_docket_data.return_value = (
-                        self.mocked_docket,
-                        {
-                            "docket": self.mocked_docket,
-                            "title": "title",
-                            "note_form": "note_form",
-                            "has_alert": mock_user_has_alert.return_value,
-                            "timezone": "EST",
-                            "private": True,
-                        },
-                    )
-
-                    response = async_to_sync(download_docket_entries_csv)(
-                        self.request, self.mocked_docket.id
-                    )
-                    self.assertEqual(response["Content-Type"], "text/csv")
+
+        mock_download_function.return_value = (
+            '"col1","col2","col3"\r\n"value1","value2","value3"'
+        )
+        mock_user_has_alert.return_value = False
+        mock_core_docket_data.return_value = (
+            self.mocked_docket,
+            {
+                "docket": self.mocked_docket,
+                "title": "title",
+                "note_form": "note_form",
+                "has_alert": mock_user_has_alert.return_value,
+                "timezone": "EST",
+                "private": True,
+            },
+        )
+        response = async_to_sync(download_docket_entries_csv)(
+            self.request, self.mocked_docket.id
+        )
+        self.assertEqual(response["Content-Type"], "text/csv")

From e03d1ca3dbadbfc65e5fc593394d6d9bef8f9672 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 01:39:26 +0000
Subject: [PATCH 251/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/opinion_page/tests.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index 72c93aaaa0..5697f9a3cf 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -1649,9 +1649,11 @@ def test_generate_docket_entries_csv_data(self) -> None:
     @mock.patch("cl.opinion_page.utils.core_docket_data")
     @mock.patch("cl.opinion_page.utils.generate_docket_entries_csv_data")
     def test_view_download_docket_entries_csv(
-        self,mock_download_function,
+        self,
+        mock_download_function,
         mock_core_docket_data,
-        mock_user_has_alert) -> None:
+        mock_user_has_alert,
+    ) -> None:
         """Test download_docket_entries_csv returns csv content"""
 
         mock_download_function.return_value = (

From 2b970a3408b3a52f4dffc93fc807497247a713fa Mon Sep 17 00:00:00 2001
From: Romanella Di Ferdinando <romanelladf@gmail.com>
Date: Mon, 26 Aug 2024 22:45:21 -0300
Subject: [PATCH 252/372] issue-4140 fixed type hinting

---
 cl/lib/model_helpers.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/lib/model_helpers.py b/cl/lib/model_helpers.py
index 10ec0112c6..a08cfdb899 100644
--- a/cl/lib/model_helpers.py
+++ b/cl/lib/model_helpers.py
@@ -1,7 +1,7 @@
 import contextlib
 import os
 import re
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Optional
 
 from django.core.exceptions import ValidationError
 from django.utils.text import get_valid_filename, slugify
@@ -566,7 +566,7 @@ def linkify_orig_docket_number(agency: str, og_docket_number: str) -> str:
 
 class CSVExportMixin:
 
-    def get_csv_columns(self, get_column_name: bool = False) -> List[str]:
+    def get_csv_columns(self, get_column_name: bool = False) -> list[str]:
         """Get list of column names required in a csv file.
         If get column name is True. It will add class name to attribute
 
@@ -577,7 +577,7 @@ def get_csv_columns(self, get_column_name: bool = False) -> List[str]:
             "Subclass must implement get_csv_columns method"
         )
 
-    def get_column_function(self) -> Dict[str, Callable[[str], str]]:
+    def get_column_function(self) -> dict[str, Callable[[str], str]]:
         """Get dict of attrs: function to apply on field value if it needs
         to be pre-processed before being add to csv
 
@@ -586,7 +586,7 @@ def get_column_function(self) -> Dict[str, Callable[[str], str]]:
             "Subclass must implement get_column_fuction method"
         )
 
-    def to_csv_row(self) -> List[str]:
+    def to_csv_row(self) -> list[str]:
         """Get fields in model based on attrs column names.
         Apply function to attr value if required.
         Return list of modified values for csv row"""

From 27c2885b9096e5886c1f0651d7315a1f70e96f47 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 27 Aug 2024 09:21:03 -0500
Subject: [PATCH 253/372] fix(elasticsearch): Drop missing citations from
 Opinions queries

Fixes: #4252
---
 cl/citations/match_citations.py         |   4 +-
 cl/citations/match_citations_queries.py |  42 +++++-----
 cl/search/templates/search.html         |   9 +++
 cl/search/tests/tests_es_opinion.py     | 100 +++++++++++++++++++++++-
 cl/search/views.py                      |  49 ++++++++++--
 5 files changed, 177 insertions(+), 27 deletions(-)

diff --git a/cl/citations/match_citations.py b/cl/citations/match_citations.py
index c43d4f03ec..c0d97485b2 100644
--- a/cl/citations/match_citations.py
+++ b/cl/citations/match_citations.py
@@ -203,7 +203,9 @@ def resolve_fullcase_citation(
         if waffle.switch_is_active("es_resolve_citations"):
             # Revolve citations using ES; enable once all the opinions are
             # indexed.
-            db_search_results = es_search_db_for_full_citation(full_citation)
+            db_search_results, _ = es_search_db_for_full_citation(
+                full_citation
+            )
         else:
             db_search_results = search_db_for_fullcitation(full_citation)
         # If there is one search result, try to return it
diff --git a/cl/citations/match_citations_queries.py b/cl/citations/match_citations_queries.py
index a320ac860b..ecfab63f58 100644
--- a/cl/citations/match_citations_queries.py
+++ b/cl/citations/match_citations_queries.py
@@ -132,14 +132,14 @@ def es_case_name_query(
 
 def es_search_db_for_full_citation(
     full_citation: FullCaseCitation, query_citation: bool = False
-) -> list[Hit]:
+) -> tuple[list[Hit], bool]:
     """For a citation object, try to match it to an item in the database using
     a variety of heuristics.
     :param full_citation: A FullCaseCitation instance.
     :param query_citation: Whether this is related to es_get_query_citation
     resolution
-    return: A ElasticSearch Result object with the results, or an empty list if
-     no hits
+    return: A two tuple, the ElasticSearch Result object with the results, or an empty list if
+     no hits and a boolean indicating whether the citation was found.
     """
 
     if not hasattr(full_citation, "citing_opinion"):
@@ -201,8 +201,9 @@ def es_search_db_for_full_citation(
     query = Q("bool", must_not=must_not, filter=filters)
     citations_query = search_query.query(query)
     results = fetch_citations(citations_query)
+    citation_found = True if len(results) > 0 else False
     if len(results) == 1:
-        return results
+        return results, citation_found
     if len(results) > 1:
         if (
             full_citation.citing_opinion is not None
@@ -213,31 +214,36 @@ def es_search_db_for_full_citation(
                 full_citation,
                 full_citation.citing_opinion,
             )
-            return results
+            return results, citation_found
     # Give up.
-    return []
+    return [], citation_found
 
 
-def es_get_query_citation(cd: CleanData) -> Hit | None:
+def es_get_query_citation(
+    cd: CleanData,
+) -> tuple[Hit | None, list[FullCaseCitation]]:
     """Extract citations from the query. If it's a single citation, search for
      it into ES, and if found, return it.
 
     :param cd: A CleanData instance.
-    :param return: An ES Hit object or None.
+    :param return: A two tuple the ES Hit object or None and the list of
+    missing citations from the query.
     """
-
+    missing_citations: list[FullCaseCitation] = []
     if not cd.get("q"):
-        return None
+        return None, missing_citations
     citations = get_citations(cd["q"], tokenizer=HYPERSCAN_TOKENIZER)
     citations = [c for c in citations if isinstance(c, FullCaseCitation)]
 
     matches = None
-    if len(citations) == 1:
-        # If it's not exactly one match, user doesn't get special help.
-        matches = es_search_db_for_full_citation(
-            citations[0], query_citation=True
+    for citation in citations:
+        matches, citation_found = es_search_db_for_full_citation(
+            citation, query_citation=True
         )
-        if len(matches) == 1:
-            # If more than one match, don't show the tip
-            return matches[0]
-    return matches
+        if not citation_found:
+            missing_citations.append(citation)
+
+    if len(citations) == 1 and matches and len(matches) == 1:
+        # If more than one match, don't show the tip
+        return matches[0], missing_citations
+    return matches, missing_citations
diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index 260ce1e7d3..93647399d3 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -218,6 +218,15 @@
             </div>
         </div>
 
+        {% if search_form.type.value == SEARCH_TYPES.OPINION and missing_citations %}
+          <h2 id="missing-citations" class="alt">
+            Showing results for &mdash; <strong>{{ suggested_query }}</strong> &mdash;
+            without citation{{ missing_citations|pluralize }}
+            {% for citation in missing_citations %} "{{ citation }}"{% if not forloop.last %}, {% endif %}{% endfor %}.
+            It appears we do not have the citation{{ missing_citations|pluralize }} in our system.
+          </h2>
+        {% endif %}
+
         {% if results.paginator.count > 0 %}
             <div class="row">
                 <div class="col-sm-12">
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index ddb8839798..ddd4a36f72 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -4,7 +4,7 @@
 
 import pytz
 import time_machine
-from asgiref.sync import sync_to_async
+from asgiref.sync import async_to_sync, sync_to_async
 from django.conf import settings
 from django.contrib.auth.hashers import make_password
 from django.contrib.auth.models import AnonymousUser
@@ -1276,6 +1276,33 @@ async def _test_article_count(self, params, expected_count, field_name):
         )
         return r
 
+    def _assert_missing_citations_query(
+        self, html_content, suggested_query, missing_citations
+    ):
+        h2_element = html.fromstring(html_content).xpath(
+            '//h2[@id="missing-citations"]'
+        )
+        h2_content = html.tostring(
+            h2_element[0], method="text", encoding="unicode"
+        ).replace("\xa0", " ")
+
+        self.assertIn(
+            suggested_query,
+            h2_content.strip(),
+            msg=f"'{suggested_query}' was not found within the message.",
+        )
+
+        for missing_citation in missing_citations:
+            with self.subTest(
+                missing_citation=missing_citation,
+                msg="Confirm missing_citations",
+            ):
+                self.assertIn(
+                    missing_citation,
+                    h2_content.strip(),
+                    msg=f"'{missing_citation}' was not found within the message.",
+                )
+
     async def test_can_perform_a_regular_text_query(self) -> None:
         # Frontend
         search_params = {"q": "supreme"}
@@ -2046,6 +2073,77 @@ def test_display_query_citation_frontend(self) -> None:
         cluster_2.delete()
         cluster.delete()
 
+    def test_drop_missing_citation_from_query(self) -> None:
+        """If a query contains a citation that we don't have,
+        drop the citation(s) from the query, perform the query, and inform the
+        users about this behavior."""
+
+        # Cluster with citation and multiple sibling opinions is properly matched.
+        with self.captureOnCommitCallbacks(execute=True):
+            cluster = OpinionClusterFactory.create(
+                case_name="Voutila v. Lorem",
+                attorneys="James Smith",
+                precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
+                docket=self.docket_1,
+                date_filed=datetime.date(2024, 8, 23),
+            )
+            CitationWithParentsFactory.create(
+                volume=31,
+                reporter="Pa. D. & C.",
+                page="445",
+                type=2,
+                cluster=cluster,
+            )
+            OpinionFactory.create(cluster=cluster, plain_text="")
+
+        # Test missing citation 32 Pa. D. & C. 446
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "Voutila v. Lorem 32 Pa. D. & C. 446 James Smith",
+            "order_by": "score desc",
+        }
+        r = async_to_sync(self._test_article_count)(
+            search_params, 1, "text_query"
+        )
+        self._assert_missing_citations_query(
+            r.content.decode(),
+            "Voutila v. Lorem James Smith",
+            ["32 Pa. D. & C. 446"],
+        )
+
+        # Test two missing citations "32 Pa. D. & C. 446" and "32 Pa. D. & C. 447"
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "Voutila v. Lorem 32 Pa. D. & C. 446 James Smith 32 Pa. D. & C. 447",
+            "order_by": "score desc",
+        }
+        r = async_to_sync(self._test_article_count)(
+            search_params, 1, "text_query"
+        )
+        self._assert_missing_citations_query(
+            r.content.decode(),
+            "Voutila v. Lorem James Smith",
+            ["32 Pa. D. & C. 446", "32 Pa. D. & C. 447"],
+        )
+
+        # Test one missing citations "32 Pa. D. & C. 446" and keep an available
+        # one "31 Pa. D. & C. 445"
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "Voutila v. Lorem 32 Pa. D. & C. 446 James Smith 31 Pa. D. & C. 445",
+            "order_by": "score desc",
+        }
+        r = async_to_sync(self._test_article_count)(
+            search_params, 1, "text_query"
+        )
+        self._assert_missing_citations_query(
+            r.content.decode(),
+            "Voutila v. Lorem James Smith 31 Pa. D. & C. 445",
+            ["32 Pa. D. & C. 446"],
+        )
+
+        cluster.delete()
+
 
 class RelatedSearchTest(
     ESIndexTestCase, CourtTestCase, PeopleTestCase, SearchTestCase, TestCase
diff --git a/cl/search/views.py b/cl/search/views.py
index 3f4cf21c22..616e02df28 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -21,6 +21,7 @@
 from django.utils.timezone import make_aware
 from django.views.decorators.cache import never_cache
 from django_elasticsearch_dsl.search import Search
+from eyecite.models import FullCaseCitation
 from requests import RequestException, Session
 from scorched.exc import SolrError
 from waffle.decorators import waffle_flag
@@ -57,6 +58,7 @@
     merge_form_with_courts,
     regroup_snippets,
 )
+from cl.lib.types import CleanData
 from cl.lib.utils import (
     sanitize_unbalanced_parenthesis,
     sanitize_unbalanced_quotes,
@@ -667,6 +669,30 @@ def es_search(request: HttpRequest) -> HttpResponse:
     return render(request, template, render_dict)
 
 
+def remove_missing_citations(
+    missing_citations: list[FullCaseCitation], cd: CleanData
+) -> tuple[list[str], str]:
+    """Removes missing citations from the query and returns the missing
+    citations as strings and the modified query.
+
+    :param missing_citations: A list of FullCaseCitation objects representing
+    the citations that are missing from the query.
+    :param cd: A CleanData object containing the query string.
+    :return: A twp tuple containing a list of missing citation strings and the
+    suggested query string with missing citations removed.
+    """
+    missing_citations_str = [
+        citation.corrected_citation() for citation in missing_citations
+    ]
+    query_string = cd["q"]
+    for citation in missing_citations_str:
+        query_string = query_string.replace(citation, "")
+    suggested_query = (
+        " ".join(query_string.split()) if missing_citations_str else ""
+    )
+    return missing_citations_str, suggested_query
+
+
 def do_es_search(
     get_params: QueryDict,
     rows: int = settings.SEARCH_PAGE_SIZE,
@@ -697,6 +723,7 @@ def do_es_search(
     cited_cluster = None
     query_citation = None
     facet_fields = []
+    missing_citations_str = []
 
     search_form = SearchForm(get_params, is_es_form=True, courts=courts)
     match get_params.get("type", SEARCH_TYPES.OPINION):
@@ -718,6 +745,20 @@ def do_es_search(
         try:
             # Create necessary filters to execute ES query
             search_query = document_type.search()
+
+            if cd["type"] in [
+                SEARCH_TYPES.OPINION,
+                SEARCH_TYPES.RECAP,
+                SEARCH_TYPES.DOCKETS,
+            ]:
+                query_citation, missing_citations = es_get_query_citation(cd)
+                if cd["type"] in [
+                    SEARCH_TYPES.OPINION,
+                ]:
+                    missing_citations_str, suggested_query = (
+                        remove_missing_citations(missing_citations, cd)
+                    )
+                    cd["q"] = suggested_query if suggested_query else cd["q"]
             (
                 s,
                 child_docs_count_query,
@@ -741,13 +782,6 @@ def do_es_search(
                 search_data=cd,
                 search_results=paged_results,
             )
-
-            if cd["type"] in [
-                SEARCH_TYPES.OPINION,
-                SEARCH_TYPES.RECAP,
-                SEARCH_TYPES.DOCKETS,
-            ]:
-                query_citation = es_get_query_citation(cd)
             related_prefix = RELATED_PATTERN.search(cd["q"])
             if related_prefix:
                 related_pks = related_prefix.group("pks").split(",")
@@ -823,6 +857,7 @@ def do_es_search(
                 settings.ELASTICSEARCH_CARDINALITY_PRECISION
             )
         ),
+        "missing_citations": missing_citations_str,
     }
 
 

From c9b430dbd4f8d96220844b9fb362d590d1e3ee7e Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 27 Aug 2024 15:36:42 -0500
Subject: [PATCH 254/372] fix(elasticsearch): Keep original query within the
 search box

- Tweaked the UX to display the message for dropped citations.
---
 cl/search/templates/search.html     | 19 +++++++++------
 cl/search/tests/tests_es_opinion.py | 38 ++++++++++++++++++++++++-----
 cl/search/views.py                  |  3 ++-
 3 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index 93647399d3..e7f20b1927 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -137,7 +137,6 @@
               {% if query_citation %}
                 <div class="col-xs-9">
                   <p><i class="fa fa-info-circle fa-lg"></i> It looks like you're trying to search for <a class="alert-link" href="{{ query_citation.absolute_url }}">{{ query_citation.caseName|safe }}</a>, from {{ query_citation.dateFiled.year }}.</p>
-
                 </div>
                 <div class="col-xs-3 text-right">
                   <a class="btn btn-primary btn-sm"
@@ -219,14 +218,18 @@
         </div>
 
         {% if search_form.type.value == SEARCH_TYPES.OPINION and missing_citations %}
-          <h2 id="missing-citations" class="alt">
-            Showing results for &mdash; <strong>{{ suggested_query }}</strong> &mdash;
-            without citation{{ missing_citations|pluralize }}
-            {% for citation in missing_citations %} "{{ citation }}"{% if not forloop.last %}, {% endif %}{% endfor %}.
-            It appears we do not have the citation{{ missing_citations|pluralize }} in our system.
-          </h2>
+          <div class="alert alert-info" role="alert">
+            <div class="row">
+                <div class="col-xs-12">
+                  <p id="missing-citations"><i class="fa fa-info-circle fa-lg"></i>
+                    Showing results for <strong>"{{ suggested_query }}"</strong> without citation{{ missing_citations|pluralize }}
+                    {% for citation in missing_citations %} <strong>"{{ citation }}"</strong>{% if not forloop.last %}, {% endif %}{% endfor %}.</br>
+                    It appears we don't yet have that citation{{ missing_citations|pluralize }}.
+                  </p>
+                </div>
+            </div>
+          </div>
         {% endif %}
-
         {% if results.paginator.count > 0 %}
             <div class="row">
                 <div class="col-sm-12">
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index ddd4a36f72..5dd4ae613a 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1279,16 +1279,19 @@ async def _test_article_count(self, params, expected_count, field_name):
     def _assert_missing_citations_query(
         self, html_content, suggested_query, missing_citations
     ):
-        h2_element = html.fromstring(html_content).xpath(
-            '//h2[@id="missing-citations"]'
+        """Assert that a message with missing citations is present in search
+        results.
+        """
+        p_element = html.fromstring(html_content).xpath(
+            '//p[@id="missing-citations"]'
         )
-        h2_content = html.tostring(
-            h2_element[0], method="text", encoding="unicode"
+        p_content = html.tostring(
+            p_element[0], method="text", encoding="unicode"
         ).replace("\xa0", " ")
 
         self.assertIn(
             suggested_query,
-            h2_content.strip(),
+            p_content.strip(),
             msg=f"'{suggested_query}' was not found within the message.",
         )
 
@@ -1299,10 +1302,21 @@ def _assert_missing_citations_query(
             ):
                 self.assertIn(
                     missing_citation,
-                    h2_content.strip(),
+                    p_content.strip(),
                     msg=f"'{missing_citation}' was not found within the message.",
                 )
 
+    def _assert_search_box_query(self, html_content, expected_query):
+        """Assert the search box value is correct."""
+        search_box = html.fromstring(html_content).xpath('//input[@id="id_q"]')
+        search_box_value = search_box[0].get("value", "")
+
+        self.assertIn(
+            expected_query,
+            search_box_value.strip(),
+            msg=f"'{expected_query}' was not found within the search box.",
+        )
+
     async def test_can_perform_a_regular_text_query(self) -> None:
         # Frontend
         search_params = {"q": "supreme"}
@@ -2110,6 +2124,10 @@ def test_drop_missing_citation_from_query(self) -> None:
             "Voutila v. Lorem James Smith",
             ["32 Pa. D. & C. 446"],
         )
+        self._assert_search_box_query(
+            r.content.decode(),
+            "Voutila v. Lorem 32 Pa. D. & C. 446 James Smith",
+        )
 
         # Test two missing citations "32 Pa. D. & C. 446" and "32 Pa. D. & C. 447"
         search_params = {
@@ -2125,6 +2143,10 @@ def test_drop_missing_citation_from_query(self) -> None:
             "Voutila v. Lorem James Smith",
             ["32 Pa. D. & C. 446", "32 Pa. D. & C. 447"],
         )
+        self._assert_search_box_query(
+            r.content.decode(),
+            "Voutila v. Lorem 32 Pa. D. & C. 446 James Smith 32 Pa. D. & C. 447",
+        )
 
         # Test one missing citations "32 Pa. D. & C. 446" and keep an available
         # one "31 Pa. D. & C. 445"
@@ -2141,6 +2163,10 @@ def test_drop_missing_citation_from_query(self) -> None:
             "Voutila v. Lorem James Smith 31 Pa. D. & C. 445",
             ["32 Pa. D. & C. 446"],
         )
+        self._assert_search_box_query(
+            r.content.decode(),
+            "Voutila v. Lorem 32 Pa. D. & C. 446 James Smith 31 Pa. D. & C. 445",
+        )
 
         cluster.delete()
 
diff --git a/cl/search/views.py b/cl/search/views.py
index 616e02df28..ae7be07191 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -741,7 +741,8 @@ def do_es_search(
             document_type = OpinionClusterDocument
 
     if search_form.is_valid() and document_type:
-        cd = search_form.cleaned_data
+        # Copy cleaned_data to preserve the original data when displaying the form
+        cd = search_form.cleaned_data.copy()
         try:
             # Create necessary filters to execute ES query
             search_query = document_type.search()

From 9e463492d8bcaff2fb7c1b4d503c2804770321c4 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 27 Aug 2024 16:34:47 -0500
Subject: [PATCH 255/372] fix(elasticsearch): Fixed dropped citations alert
 message

---
 cl/search/templates/search.html     |  4 ++--
 cl/search/tests/tests_es_opinion.py | 11 +++++++++++
 cl/search/views.py                  |  2 +-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index e7f20b1927..8fd9de9ea5 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -223,8 +223,8 @@
                 <div class="col-xs-12">
                   <p id="missing-citations"><i class="fa fa-info-circle fa-lg"></i>
                     Showing results for <strong>"{{ suggested_query }}"</strong> without citation{{ missing_citations|pluralize }}
-                    {% for citation in missing_citations %} <strong>"{{ citation }}"</strong>{% if not forloop.last %}, {% endif %}{% endfor %}.</br>
-                    It appears we don't yet have that citation{{ missing_citations|pluralize }}.
+                    {% for citation in missing_citations %} <strong>"{{ citation }}"</strong>{% if not forloop.last %}, {% endif %}{% endfor %}.
+                    It appears we don't yet have {{ missing_citations|pluralize:"that citation,those citations" }}.
                   </p>
                 </div>
             </div>
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 5dd4ae613a..e901e314c6 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1306,6 +1306,17 @@ def _assert_missing_citations_query(
                     msg=f"'{missing_citation}' was not found within the message.",
                 )
 
+        if len(missing_citations) > 1:
+            self.assertIn(
+                "It appears we don't yet have those citations.",
+                p_content.strip(),
+            )
+        else:
+            self.assertIn(
+                "It appears we don't yet have that citation.",
+                p_content.strip(),
+            )
+
     def _assert_search_box_query(self, html_content, expected_query):
         """Assert the search box value is correct."""
         search_box = html.fromstring(html_content).xpath('//input[@id="id_q"]')
diff --git a/cl/search/views.py b/cl/search/views.py
index ae7be07191..dc23746ef4 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -678,7 +678,7 @@ def remove_missing_citations(
     :param missing_citations: A list of FullCaseCitation objects representing
     the citations that are missing from the query.
     :param cd: A CleanData object containing the query string.
-    :return: A twp tuple containing a list of missing citation strings and the
+    :return: A two-tuple containing a list of missing citation strings and the
     suggested query string with missing citations removed.
     """
     missing_citations_str = [

From 5e5e65f0a5f8962db299b5bba4103259398966c9 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Tue, 27 Aug 2024 17:36:54 -0400
Subject: [PATCH 256/372] refactor(opinion_page): Tweaks export url

Co-authored-by: Mike Lissner <mike@free.law>
---
 cl/opinion_page/urls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index ff8af9cf64..0b35cb82c3 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -56,7 +56,7 @@
         "docket/<int:pk>/<blank-slug:slug>/", view_docket, name="view_docket"  # type: ignore[arg-type]
     ),
     path(
-        "docket/download/<int:docket_id>/",
+        "docket/<int:docket_id>/download/",
         download_docket_entries_csv,  # type: ignore[arg-type]
         name="view_download_docket",
     ),

From 9c920ecec90643b68850eda8c3c9cb1d05da036b Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Tue, 27 Aug 2024 15:09:35 -0700
Subject: [PATCH 257/372] fix(urls): Elevate the download URL

---
 cl/opinion_page/urls.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index 0b35cb82c3..c029924a0e 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -52,14 +52,15 @@
         name="docket_feed",
     ),
     path("opinion/<int:pk>/<blank-slug:_>/", view_opinion, name="view_case"),  # type: ignore[arg-type]
-    path(
-        "docket/<int:pk>/<blank-slug:slug>/", view_docket, name="view_docket"  # type: ignore[arg-type]
-    ),
     path(
         "docket/<int:docket_id>/download/",
         download_docket_entries_csv,  # type: ignore[arg-type]
         name="view_download_docket",
     ),
+    path(
+        "docket/<int:pk>/<blank-slug:slug>/", view_docket, name="view_docket"
+        # type: ignore[arg-type]
+    ),
     path(
         "recap/gov.uscourts.<str:court>.<str:pacer_case_id>/",
         redirect_docket_recap,  # type: ignore[arg-type]

From d0a405d4eceb7bb3272fd361ad7233f965bee046 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 22:10:30 +0000
Subject: [PATCH 258/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/opinion_page/urls.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index c029924a0e..5e7a9e1a54 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -58,7 +58,9 @@
         name="view_download_docket",
     ),
     path(
-        "docket/<int:pk>/<blank-slug:slug>/", view_docket, name="view_docket"
+        "docket/<int:pk>/<blank-slug:slug>/",
+        view_docket,
+        name="view_docket",
         # type: ignore[arg-type]
     ),
     path(

From ed2c6acca288e6d6a489d6e17cfb1e3aef564a16 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 27 Aug 2024 18:01:02 -0600
Subject: [PATCH 259/372] feat(pacer_free_documents): update function to get
 pdfs add new argument to command

---
 .../commands/scrape_pacer_free_opinions.py    | 42 +++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index 6aa4d76765..8da6e4c200 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -7,7 +7,8 @@
 
 from celery.canvas import chain
 from django.conf import settings
-from django.db.models import Q
+from django.db.models import F, Q, Window
+from django.db.models.functions import RowNumber
 from django.utils.timezone import now
 from juriscraper.lib.date_utils import make_date_range_tuples
 from juriscraper.lib.exceptions import PacerLoginException
@@ -256,6 +257,7 @@ def get_pdfs(
     date_end: datetime.date,
     index: bool,
     queue: str,
+    pdf_days_ago: int,
 ) -> None:
     """Get PDFs for the results of the Free Document Report queries.
 
@@ -273,6 +275,7 @@ def get_pdfs(
     courts
     :param index: true if we should index as we process the data or do it later
     :param queue: the queue name
+    :param pdf_days_ago: specify the number of days ago from which to download PDFs
     :return: None
     """
     q = cast(str, queue)
@@ -280,17 +283,44 @@ def get_pdfs(
     base_filter = Q(error_msg="")
 
     if courts:
+        # Download PDFs only from specified court ids
         base_filter &= Q(court_id__in=courts)
 
     if date_start and date_end:
+        # Download documents only from the date range passed from the command args (
+        # sweep)
+        base_filter &= Q(date_filed__gte=date_start, date_filed__lte=date_end)
+    else:
+        # Download documents only from 'pdf_days_ago' ago
+        date_start = datetime.date.today() - datetime.timedelta(
+            days=pdf_days_ago
+        )
+        date_end = datetime.date.today()
         base_filter &= Q(date_filed__gte=date_start, date_filed__lte=date_end)
 
-    rows = PACERFreeDocumentRow.objects.filter(base_filter).only("pk")
+    # Filter rows based on the base_filter, then annotate each row with a row_number
+    # within each partition defined by 'court_id', ordering the rows by 'pk' in
+    # ascending order. Finally, order the results by 'row_number' and 'court_id' to
+    # download one item for each court until it finishes
+    rows = (
+        PACERFreeDocumentRow.objects.filter(base_filter)
+        .annotate(
+            row_number=Window(
+                expression=RowNumber(),
+                partition_by=[F("court_id")],
+                order_by=F("pk").asc(),
+            )
+        )
+        .order_by("row_number", "court_id")
+        .only("pk", "court_id")
+    )
     count = rows.count()
     task_name = "downloading"
     if index:
         task_name += " and indexing"
-    logger.info(f"{task_name} {count} items from PACER.")
+    logger.info(
+        f"{task_name} {count} items from PACER from {date_start} to {date_end}."
+    )
     throttle = CeleryThrottle(queue_name=q)
     completed = 0
     for row in rows.iterator():
@@ -449,6 +479,12 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None:
             type=valid_date,
             help="Date when the query should end.",
         )
+        parser.add_argument(
+            "--pdf-days-ago",
+            type=int,
+            default=10,
+            help="Flag to only download PDFs from X days ago",
+        )
 
     def handle(self, *args: List[str], **options: OptionsType) -> None:
         super().handle(*args, **options)

From baaf41c58fdb747421178683f05a3386f92a6fec Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Tue, 27 Aug 2024 20:29:18 -0500
Subject: [PATCH 260/372] fix(scrapers.utils.update_or_create_docket): correct
 docket match for scrapers

Related to #4256

Docket numbers were mismatched for `az`, due to the order of queries in `recap.mergers.find_docket_object`

- created a new function `get_existing_docket` to be used by scrapers
- refactored the logger.error call for update values different than existing values. Now it will
only trigger for case_name, and only when it is too diferent (less than 50% of words in common).
---
 .../management/commands/harvard_opinions.py   |   2 +-
 .../management/commands/cl_scrape_opinions.py |   2 +-
 .../commands/cl_scrape_oral_arguments.py      |   2 +-
 cl/scrapers/utils.py                          | 138 +++++++++++++-----
 4 files changed, 104 insertions(+), 40 deletions(-)

diff --git a/cl/corpus_importer/management/commands/harvard_opinions.py b/cl/corpus_importer/management/commands/harvard_opinions.py
index 6b27a619ce..d0fea17fa9 100644
--- a/cl/corpus_importer/management/commands/harvard_opinions.py
+++ b/cl/corpus_importer/management/commands/harvard_opinions.py
@@ -501,7 +501,7 @@ def add_new_case(
             court_id,
             docket_string,
             Docket.HARVARD,
-            overwrite_existing_data=True,
+            from_harvard=True,
             case_name_full=case_name_full,
             ia_needs_upload=False,
         )
diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index 9e7854824b..b05658cbfe 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -116,7 +116,7 @@ def make_objects(
         court.pk,
         item.get("docket_numbers", ""),
         item.get("source") or Docket.SCRAPER,
-        overwrite_existing_data=False,
+        from_harvard=False,
         blocked=blocked,
         date_blocked=date_blocked,
         appeal_from_str=item.get("lower_courts", ""),
diff --git a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
index 33da02925d..8f64e6eb4d 100644
--- a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
@@ -77,7 +77,7 @@ def make_objects(
         court.pk,
         item.get("docket_numbers", ""),
         item.get("source") or Docket.SCRAPER,
-        overwrite_existing_data=False,
+        from_harvard=False,
         blocked=blocked,
         date_blocked=date_blocked,
         date_argued=item["case_dates"],
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index b9e373fd42..cf2d6528c4 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -1,6 +1,5 @@
 import os
 import sys
-import traceback
 from datetime import date
 from typing import Optional, Tuple
 from urllib.parse import urljoin
@@ -17,10 +16,11 @@
 from lxml import html
 from requests import Response, Session
 
+from cl.corpus_importer.utils import winnow_case_name
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.decorators import retry
 from cl.lib.microservice_utils import microservice
-from cl.recap.mergers import find_docket_object
+from cl.recap.mergers import find_docket_object, make_docket_number_core
 from cl.scrapers.exceptions import (
     EmptyFileError,
     NoDownloadUrlError,
@@ -289,13 +289,61 @@ def extract_recap_documents(
             sys.stdout.flush()
 
 
+def get_existing_docket(court_id: str, docket_number: str) -> Docket | None:
+    """Look for an existing docket for a given court_id and docket number
+
+    recap.mergers.find_docket_object prioritizes lookups by docket_number_core
+    which is designed for federal / PACER sources. This function is rough
+    equivalent with lookup priorities inverted, intended to be used with
+    scraped sources
+
+    Even when make_docket_number_core returns an empty string for most state
+    courts that we scrape, it causes mismatches in courts like `az`, where
+    2 different dockets like '1 CA-CR 23-0297' and '1 CA-CV 23-0297-FC'
+    have the same core number
+
+    Examples of  docket numbers do not map to a docket_number_core
+    (fldistctapp '5D2023-0888'), (ohioctapp, '22CA15')
+
+    :param court_id: the court id
+    :param docket_number: the docket number
+
+    :return: Docket if find a match, None if we don't
+    """
+    lookups = [
+        {"court_id": court_id, "docket_number": docket_number},
+    ]
+
+    docket_number_core = make_docket_number_core(docket_number)
+    if docket_number_core:
+        lookups.append(
+            {
+                "court_id": court_id,
+                "pacer_case_id": None,
+                "docket_number_core": docket_number_core,
+            }
+        )
+
+    for lookup in lookups:
+        queryset = Docket.objects.filter(**lookup)
+        count = queryset.count()
+        if count == 1:
+            return queryset[0]
+        if count > 1:
+            logger.error(
+                "%s: more than 1 docket match for docket number '%s'",
+                court_id,
+                docket_number,
+            )
+
+
 def update_or_create_docket(
     case_name: str,
     case_name_short: str,
     court_id: str,
     docket_number: str,
     source: int,
-    overwrite_existing_data: bool,
+    from_harvard: bool,
     blocked: bool = False,
     case_name_full: str = "",
     date_blocked: date | None = None,
@@ -311,8 +359,8 @@ def update_or_create_docket(
     :param court_id: The court id the docket belongs to.
     :param docket_number: The docket number.
     :param source: The docket source.
-    :param overwrite_existing_data: should be True when this function is
-        called from the Harvard importer; the Harvard data is considered
+    :param from_harvard: True when this function is called from the
+        Harvard importer; the Harvard data is considered
         more trustable  and should overwrite an existing docket's data
         Should be False when called from scrapers.
     :param blocked: If the docket should be blocked, default False.
@@ -334,43 +382,59 @@ def update_or_create_docket(
         "date_blocked": date_blocked,
         "date_argued": date_argued,
     }
-
-    docket = async_to_sync(find_docket_object)(
-        court_id, None, docket_number, None, None, None
-    )
-    if docket.pk:
-        # Update the existing docket with the new values
-        docket.add_opinions_source(source)
-
-        for field, value in docket_fields.items():
-            # do not use blanket `if not value:`, since
-            # blocked and ia_needs_upload are booleans and would be skipped
-            if value is None or value == "":
-                continue
-
-            if (
-                not overwrite_existing_data
-                and getattr(docket, field)
-                and getattr(docket, field) != value
-            ):
-                # Prevent overwriting values that already exist, since default values
-                # to this function are empty strings or None
-                logger.error(
-                    "Docket %s already has a %s %s, different than new value %s",
-                    docket.pk,
-                    field,
-                    getattr(docket, field),
-                    value,
-                )
-            else:
-                setattr(docket, field, value)
+    if from_harvard:
+        docket = async_to_sync(find_docket_object)(
+            court_id, None, docket_number, None, None, None
+        )
     else:
-        # Create a new docket with docket_fields and additional fields
-        docket = Docket(
+        docket = get_existing_docket(court_id, docket_number)
+
+    if not docket or not docket.pk:
+        return Docket(
             **docket_fields,
             source=source,
             docket_number=docket_number,
             court_id=court_id,
         )
 
+    # Update the existing docket with the new values
+    docket.add_opinions_source(source)
+
+    for field, value in docket_fields.items():
+        # do not use blanket `if not value:`, since
+        # blocked and ia_needs_upload are booleans and would be skipped
+        if value is None or value == "":
+            continue
+
+        if (
+            not from_harvard
+            and field == "case_name"
+            and getattr(docket, field)
+            and getattr(docket, field) != value
+        ):
+            # Safeguard to catch possible docket mismatches, check that they
+            # have at least 50% of words in common
+            new_parts = winnow_case_name(value)
+            old_parts = winnow_case_name(docket.case_name)
+            denominator = min(len(old_parts), len(new_parts)) + 1
+            if len(new_parts.intersection(old_parts)) / denominator < 0.5:
+                logger.error(
+                    "New case_name '%s' looks too different from old '%s'. Court %s. Docket %s",
+                    value,
+                    docket.case_name,
+                    court_id,
+                    docket.pk,
+                )
+                continue
+
+            # Most times, we find updated values for case_name that may
+            # be a longer form than what we currently have, which we can
+            # take advantage of to populate case_name_full
+            if not getattr(docket, "case_name_full") and len(value) > len(
+                getattr(docket, field)
+            ):
+                setattr(docket, "case_name_full", value)
+        else:
+            setattr(docket, field, value)
+
     return docket

From 804a36b61145f6b8b0e6bda19f78d1da89a859ca Mon Sep 17 00:00:00 2001
From: legaltextai <144342123+legaltextai@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:37:31 -0400
Subject: [PATCH 261/372] Mention Pulitzer-winning on FD coverage page

---
 cl/simple_pages/templates/help/coverage_fds.html | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/cl/simple_pages/templates/help/coverage_fds.html b/cl/simple_pages/templates/help/coverage_fds.html
index 80718a6258..0e721a59c9 100644
--- a/cl/simple_pages/templates/help/coverage_fds.html
+++ b/cl/simple_pages/templates/help/coverage_fds.html
@@ -53,7 +53,12 @@ <h1 id="overview">Data Coverage &mdash; What Financial Disclosures Does CourtLis
     <p>Additionally, judicial nominees file financial disclosures with the Senate Judiciary Committee during the nomination process. Free Law Project <a href="https://www.github.com/freelawproject/nomination-extractor/">built a machine learning model and tool</a> that could identify financial disclosures and used it to scan hundreds of thousands of pages of Senate records.</p>
 
     <p><a href="https://www.judicialwatch.org/" rel="nofollow">One other organization</a> has systematically placed disclosures online, and we have gathered their corpus as well, as described below. A news organization called ABPNews also gathered them in the late nineties, but after going out of business in 2000, <a href="https://x.com/FreeLawProject/status/1093612415185350656">their records were lost to water damage</a>. If you know of other sources, <a href="{% url "contact" %}">please get in touch</a>.
-    </p>
+    </p> 
+
+    <p>This collection was used by the <em>Wall Street Journal</em> in their <a href="https://www.wsj.com/articles/131-federal-judges-broke-the-law-by-hearing-cases-where-they-had-a-financial-interest-11632834421/">groundbreaking series on judicial conflicts of interest</a>.</p>
+
+    <p>In 2023, ProPublica utilized our dataset for an ambitious investigation into the Supreme Court, which led to their winning the prestigious public service Pulitzer Prize in 2024. The Pulitzer committee praised ProPublica's <a href="https://www.propublica.org/article/pulitzer-prize-announcement-propublica-supreme-court">"groundbreaking and ambitious reporting that pierced the thick wall of secrecy surrounding the Supreme Court to reveal how a small group of politically influential billionaires wooed justices with lavish gifts and travel, pushing the Court to adopt its first code of conduct."</a> This recognition underscores the critical importance of judicial transparency and the impact of our database in supporting investigative journalism.</p>
+    
     <hr>
 
     <h2 id="totals">Yearly Totals</h2>

From cb04bca493743bb285413009385eee04706dc1ab Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:42:01 +0000
Subject: [PATCH 262/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/simple_pages/templates/help/coverage_fds.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/simple_pages/templates/help/coverage_fds.html b/cl/simple_pages/templates/help/coverage_fds.html
index 0e721a59c9..d80930776c 100644
--- a/cl/simple_pages/templates/help/coverage_fds.html
+++ b/cl/simple_pages/templates/help/coverage_fds.html
@@ -53,12 +53,12 @@ <h1 id="overview">Data Coverage &mdash; What Financial Disclosures Does CourtLis
     <p>Additionally, judicial nominees file financial disclosures with the Senate Judiciary Committee during the nomination process. Free Law Project <a href="https://www.github.com/freelawproject/nomination-extractor/">built a machine learning model and tool</a> that could identify financial disclosures and used it to scan hundreds of thousands of pages of Senate records.</p>
 
     <p><a href="https://www.judicialwatch.org/" rel="nofollow">One other organization</a> has systematically placed disclosures online, and we have gathered their corpus as well, as described below. A news organization called ABPNews also gathered them in the late nineties, but after going out of business in 2000, <a href="https://x.com/FreeLawProject/status/1093612415185350656">their records were lost to water damage</a>. If you know of other sources, <a href="{% url "contact" %}">please get in touch</a>.
-    </p> 
+    </p>
 
     <p>This collection was used by the <em>Wall Street Journal</em> in their <a href="https://www.wsj.com/articles/131-federal-judges-broke-the-law-by-hearing-cases-where-they-had-a-financial-interest-11632834421/">groundbreaking series on judicial conflicts of interest</a>.</p>
 
     <p>In 2023, ProPublica utilized our dataset for an ambitious investigation into the Supreme Court, which led to their winning the prestigious public service Pulitzer Prize in 2024. The Pulitzer committee praised ProPublica's <a href="https://www.propublica.org/article/pulitzer-prize-announcement-propublica-supreme-court">"groundbreaking and ambitious reporting that pierced the thick wall of secrecy surrounding the Supreme Court to reveal how a small group of politically influential billionaires wooed justices with lavish gifts and travel, pushing the Court to adopt its first code of conduct."</a> This recognition underscores the critical importance of judicial transparency and the impact of our database in supporting investigative journalism.</p>
-    
+
     <hr>
 
     <h2 id="totals">Yearly Totals</h2>

From 9c72345fb490ca23541d3548a8e93b95b9d563e0 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 28 Aug 2024 12:29:37 -0400
Subject: [PATCH 263/372] fix(utils): Prevent incorrect CSV data appending due
 to shared variable

- Refactored `generate_docket_entries_csv_data` to avoid using a common variable within the inner loop.
- This change addresses a bug that was causing duplicate or incorrect data in the generated CSV file.
---
 cl/opinion_page/tests.py | 36 +++++++++++++++++++++++++++++++++++-
 cl/opinion_page/utils.py |  6 +++---
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index 5697f9a3cf..df29677495 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -1,6 +1,7 @@
 # mypy: disable-error-code=attr-defined
 import datetime
 import os
+import re
 import shutil
 from datetime import date
 from http import HTTPStatus
@@ -1588,7 +1589,15 @@ def setUp(self):
             docket_entry=de3,
             pacer_doc_id="00506582222",
             document_number="3",
-            document_type=RECAPDocument.PACER_DOCUMENT,
+            document_type=RECAPDocument.ATTACHMENT,
+            attachment_number=1,
+        )
+        RECAPDocumentFactory(
+            docket_entry=de3,
+            description="Document attachment",
+            document_type=RECAPDocument.ATTACHMENT,
+            document_number="3",
+            attachment_number=2,
         )
         # Create extra docket and docket entries to make sure it only fetch
         # required docket_entries
@@ -1645,6 +1654,31 @@ def test_generate_docket_entries_csv_data(self) -> None:
         self.assertEqual(res[:16], '"docketentry_id"')
         self.assertEqual(res_line_data[1], '"506581111"')
 
+        # Checks if the number of values in each CSV row matches the expected
+        # number of columns.
+
+        # Compute the expected number of columns by combining the columns from
+        # the docket entry and recap documents
+        docket_entry = self.mocked_docket_entries[0]
+        de_columns = docket_entry.get_csv_columns(get_column_name=True)
+        rd_columns = docket_entry.recap_documents.first().get_csv_columns(
+            get_column_name=True
+        )
+        column_count = len(de_columns + rd_columns)
+
+        # Iterate over each line in the generated CSV data and count the number
+        # of values.
+        rows = [
+            len(re.findall('"([^"]*)"', line)) == column_count
+            for line in res_lines
+            if line
+        ]
+        # Assert that all rows have the expected number of values.
+        self.assertTrue(
+            all(rows),
+            "One or more rows of the CSV file has more values than expected",
+        )
+
     @mock.patch("cl.opinion_page.utils.user_has_alert")
     @mock.patch("cl.opinion_page.utils.core_docket_data")
     @mock.patch("cl.opinion_page.utils.generate_docket_entries_csv_data")
diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 92ae01f159..2e5f7f1c99 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -154,10 +154,10 @@ def generate_docket_entries_csv_data(docket_entries):
     csvwriter.writerow(columns)
 
     for docket_entry in docket_entries:
-        row = docket_entry.to_csv_row()
         for recap_doc in docket_entry.recap_documents.all():
-            row += recap_doc.to_csv_row()
-            csvwriter.writerow(row)
+            csvwriter.writerow(
+                docket_entry.to_csv_row() + recap_doc.to_csv_row()
+            )
 
     csv_content: str = output.getvalue()
     output.close()

From afc475e0e2a9f813df8bebeb25dbc3ae8bc91096 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 28 Aug 2024 12:00:15 -0500
Subject: [PATCH 264/372] feat(elasticsearch): Added frontend micro-cache for
 search results and counts

Fixes: #4350
---
 cl/search/tests/tests_es_recap.py | 118 ++++++++++++++++++++++++++++++
 cl/search/views.py                |  55 ++++++++++++++
 cl/settings/project/search.py     |   1 +
 3 files changed, 174 insertions(+)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 4f3b0da901..4569218a7d 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -7,6 +7,7 @@
 import time_machine
 from asgiref.sync import async_to_sync, sync_to_async
 from django.conf import settings
+from django.core.cache import cache
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.core.management import call_command
 from django.test import AsyncClient, override_settings
@@ -105,6 +106,7 @@ def setUpTestData(cls):
         )
         # Index parties in ES.
         index_docket_parties_in_es.delay(cls.de.docket.pk)
+        cache.clear()
 
     async def _test_article_count(self, params, expected_count, field_name):
         r = await self.async_client.get("/", params)
@@ -2545,6 +2547,122 @@ def test_initial_complaint_button(self) -> None:
         for docket in dockets_to_remove:
             docket.delete()
 
+    @mock.patch("cl.search.views.fetch_es_results")
+    @override_settings(RECAP_SEARCH_PAGE_SIZE=2)
+    async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
+        """Assert micro-cache for search results behaves properly."""
+
+        mock_fetch_es.side_effect = lambda *args, **kwargs: fetch_es_results(
+            *args, **kwargs
+        )
+        # Combine query and filter.
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "available_only": True,
+            "q": "Amicus Curiae Lorem",
+        }
+        # First query shouldn't be cached.
+        r = await self._test_article_count(params, 1, "filter + text query")
+        # fetch_es_results is called one time.
+        self.assertEqual(mock_fetch_es.call_count, 1)
+        # Count child documents under docket.
+        self._count_child_documents(
+            0, r.content.decode(), 1, "child filter + text query"
+        )
+        self._assert_results_header_content(r.content.decode(), "1 Case")
+        self._assert_results_header_content(
+            r.content.decode(), "1 Docket Entry"
+        )
+
+        # Repeat the query:
+        r = await self._test_article_count(params, 1, "filter + text query")
+        # fetch_es_results is not called again; results are retrieved from the cache.
+        self.assertEqual(mock_fetch_es.call_count, 1)
+        # Count child documents under docket.
+        self._count_child_documents(
+            0, r.content.decode(), 1, "child filter + text query"
+        )
+        self._assert_results_header_content(r.content.decode(), "1 Case")
+        self._assert_results_header_content(
+            r.content.decode(), "1 Docket Entry"
+        )
+
+        # Change params order and repeat the query:
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "Amicus Curiae Lorem",
+            "available_only": True,
+        }
+        r = await self._test_article_count(params, 1, "filter + text query")
+        # fetch_es_results is not called again; results are retrieved from the cache.
+        self.assertEqual(mock_fetch_es.call_count, 1)
+        self._assert_results_header_content(r.content.decode(), "1 Case")
+        self._assert_results_header_content(
+            r.content.decode(), "1 Docket Entry"
+        )
+
+        # Change query content.
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "Amicus Curiae",
+            "available_only": True,
+        }
+        r = await self._test_article_count(params, 1, "filter + text query")
+        # fetch_es_results is called this time; the cache is not used.
+        self.assertEqual(mock_fetch_es.call_count, 2)
+
+        # Confirm searches with no results are also cached.
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "Index Curiae",
+            "available_only": True,
+        }
+        r = await self._test_article_count(params, 0, "filter + text query")
+        self.assertIn(
+            "had no results",
+            r.content.decode(),
+        )
+        # fetch_es_results is called this time; the cache is not used.
+        self.assertEqual(mock_fetch_es.call_count, 3)
+
+        # Repeat the query:
+        r = await self._test_article_count(params, 0, "filter + text query")
+        self.assertIn(
+            "had no results",
+            r.content.decode(),
+        )
+        # fetch_es_results is not called again; results are retrieved from the cache.
+        self.assertEqual(mock_fetch_es.call_count, 3)
+
+        # Confirm results without page parameter are cached as page 1.
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "",
+        }
+        r = await self._test_article_count(params, 2, "filter + text query")
+        # fetch_es_results is called this time; the cache is not used.
+        self.assertEqual(mock_fetch_es.call_count, 4)
+
+        # Same parameters, including page: 1 explicitly.
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "",
+            "page": "1",
+        }
+        r = await self._test_article_count(params, 2, "filter + text query")
+        # fetch_es_results is not called again; results are retrieved from the cache.
+        self.assertEqual(mock_fetch_es.call_count, 4)
+
+        # Same parameters page 2.
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "",
+            "page": "2",
+        }
+        r = await self._test_article_count(params, 0, "filter + text query")
+        # fetch_es_results is called this time; the cache is not used.
+        self.assertEqual(mock_fetch_es.call_count, 5)
+
 
 class RECAPSearchAPICommonTests(RECAPSearchTestCase):
 
diff --git a/cl/search/views.py b/cl/search/views.py
index dc23746ef4..d8ebde37c1 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -1,4 +1,6 @@
+import json
 import logging
+import pickle
 import traceback
 from datetime import date, datetime, timedelta, timezone
 from urllib.parse import quote
@@ -32,6 +34,7 @@
 from cl.citations.match_citations_queries import es_get_query_citation
 from cl.custom_filters.templatetags.text_filters import naturalduration
 from cl.lib.bot_detector import is_bot
+from cl.lib.crypto import sha256
 from cl.lib.elasticsearch_utils import (
     build_es_main_query,
     compute_lowest_possible_estimate,
@@ -862,6 +865,30 @@ def do_es_search(
     }
 
 
+def retrieve_cached_search_results(
+    get_params: QueryDict,
+) -> tuple[dict[str, Page | int] | None, str]:
+    """
+    Retrieve cached search results based on the GET parameters.
+
+    :param get_params: The GET parameters provided by the user.
+    :return: A two-tuple containing either the cached search results and a hash
+    of the get parameters, or None and the query parameters hash if no cached
+    results were found.
+    """
+
+    params = get_params.copy()
+    # If no page is present in the parameters, set it to 1 to generate the same
+    # hash for page 1, regardless of whether the page parameter is included.
+    params.setdefault("page", "1")
+    sorted_params = json.dumps(dict(sorted(params.items())), sort_keys=True)
+    params_hash = sha256(sorted_params.encode("utf-8"))
+    cached_results = cache.get(params_hash)
+    if cached_results:
+        return pickle.loads(cached_results), params_hash
+    return None, params_hash
+
+
 def fetch_and_paginate_results(
     get_params: QueryDict,
     search_query: Search,
@@ -884,10 +911,23 @@ def fetch_and_paginate_results(
 
     # Run the query and set up pagination
     if cache_key is not None:
+        # Check cache for displaying insights on the Home Page.
         results = cache.get(cache_key)
         if results is not None:
             return results, 0, False, None, None
 
+    # Check micro-cache for all other search requests.
+    results_dict, get_params_hash = retrieve_cached_search_results(get_params)
+    if results_dict:
+        # Return results and counts. Set query time to 0ms.
+        return (
+            results_dict["results"],
+            0,
+            False,
+            results_dict["main_total"],
+            results_dict["child_total"],
+        )
+
     try:
         page = int(get_params.get("page", 1))
     except ValueError:
@@ -920,5 +960,20 @@ def fetch_and_paginate_results(
     merge_unavailable_fields_on_parent_document(results, search_type)
 
     if cache_key is not None:
+        # Cache only Page results for displaying insights on the Home Page.
         cache.set(cache_key, results, settings.QUERY_RESULTS_CACHE)
+    else:
+        # Cache Page results and counts for all other search requests.
+        results_dict = {
+            "results": results,
+            "main_total": main_total,
+            "child_total": child_total,
+        }
+        serialized_data = pickle.dumps(results_dict)
+        cache.set(
+            get_params_hash,
+            serialized_data,
+            settings.SEARCH_RESULTS_MICRO_CACHE,
+        )
+
     return results, query_time, error, main_total, child_total
diff --git a/cl/settings/project/search.py b/cl/settings/project/search.py
index cdff05e9db..49fbfd839c 100644
--- a/cl/settings/project/search.py
+++ b/cl/settings/project/search.py
@@ -61,6 +61,7 @@
 RELATED_MLT_MAXWL = 0
 RELATED_FILTER_BY_STATUS = "Precedential"
 QUERY_RESULTS_CACHE = 60 * 60 * 6
+SEARCH_RESULTS_MICRO_CACHE = 60 * 60
 
 #####################
 # Search pagination #

From c73f43b4a8f9bb10ab4b7ac5c60b04290c00e86a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 28 Aug 2024 11:20:48 -0600
Subject: [PATCH 265/372] feat(pacer_free_documents): update function to get
 report

---
 .../commands/scrape_pacer_free_opinions.py    | 61 +++++++++++++------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index 8da6e4c200..b09ca000a1 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -217,11 +217,12 @@ def get_and_save_free_document_reports(
         # point for the sweep
         dates = make_date_range_tuples(date_start, date_end, gap=7)
 
-    for pacer_court_id in pacer_court_ids:
-        court_failed = False
-        if not dates:
+    if not dates:
+        # We are not using a custom date range, it is the daily cron
+        for pacer_court_id in pacer_court_ids:
+            court_failed = False
             # We don't pass the dates in the command, so we generate the range based
-            # on each court
+            # on each court using last day queried until today to build the date range
             date_end = datetime.date.today()
             date_start = get_last_complete_date(pacer_court_id)
             if not date_start:
@@ -232,23 +233,43 @@ def get_and_save_free_document_reports(
                 continue
             dates = make_date_range_tuples(date_start, date_end, gap=7)
 
-        # Iterate through the gap in dates either short or long
+            # Iterate through the gap in dates either short or long
+            for _start, _end in dates:
+                exc = fetch_doc_report(
+                    pacer_court_id, _start, _end  # type: ignore
+                )
+                if exc:
+                    # Something happened with the queried date range, abort process for
+                    # that court
+                    court_failed = True
+                    break
+
+                # Wait 1s between queries to try to avoid a possible throttling/blocking
+                # from the court
+                time.sleep(1)
+
+            if court_failed:
+                continue
+    else:
+        # Custom date range, alternate courts on a weekly basis to generate report
+        # when running sweep based on specified date range
         for _start, _end in dates:
-            exc = fetch_doc_report(
-                pacer_court_id, _start, _end  # type: ignore
-            )
-            if exc:
-                # Something happened with the queried date range, abort process for
-                # that court
-                court_failed = True
-                break
-
-            # Wait 1s between queries to try to avoid a possible throttling/blocking
-            # from the court
-            time.sleep(1)
-
-        if court_failed:
-            continue
+            court_failed = False
+            for pacer_court_id in pacer_court_ids:
+                exc = fetch_doc_report(
+                    pacer_court_id, _start, _end  # type: ignore
+                )
+                if exc:
+                    # Something happened with the queried date range, abort process for
+                    # that court
+                    court_failed = True
+                    break
+                # Wait 1s between queries to try to avoid a possible throttling/blocking
+                # from the court
+                time.sleep(1)
+
+            if court_failed:
+                continue
 
 
 def get_pdfs(

From 4b3832ca6310b34a29b4751d91720666410dd181 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 28 Aug 2024 12:34:58 -0500
Subject: [PATCH 266/372] fix(elasticsearch): Introduced setting to enable the
 search results micro-cache

---
 cl/search/tests/tests_es_recap.py        | 4 +++-
 cl/search/views.py                       | 2 +-
 cl/settings/third_party/elasticsearch.py | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 4569218a7d..282b26921d 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2548,7 +2548,9 @@ def test_initial_complaint_button(self) -> None:
             docket.delete()
 
     @mock.patch("cl.search.views.fetch_es_results")
-    @override_settings(RECAP_SEARCH_PAGE_SIZE=2)
+    @override_settings(
+        RECAP_SEARCH_PAGE_SIZE=2, ELASTICSEARCH_MICRO_CACHE_ENABLED=True
+    )
     async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         """Assert micro-cache for search results behaves properly."""
 
diff --git a/cl/search/views.py b/cl/search/views.py
index d8ebde37c1..7094170a24 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -962,7 +962,7 @@ def fetch_and_paginate_results(
     if cache_key is not None:
         # Cache only Page results for displaying insights on the Home Page.
         cache.set(cache_key, results, settings.QUERY_RESULTS_CACHE)
-    else:
+    elif settings.ELASTICSEARCH_MICRO_CACHE_ENABLED:
         # Cache Page results and counts for all other search requests.
         results_dict = {
             "results": results,
diff --git a/cl/settings/third_party/elasticsearch.py b/cl/settings/third_party/elasticsearch.py
index cea935a3d0..f9b27e07c0 100644
--- a/cl/settings/third_party/elasticsearch.py
+++ b/cl/settings/third_party/elasticsearch.py
@@ -251,3 +251,7 @@
 
 ELASTICSEARCH_MAX_RESULT_COUNT = 10_000
 ELASTICSEARCH_CARDINALITY_PRECISION = 2000
+
+ELASTICSEARCH_MICRO_CACHE_ENABLED = env(
+    "ELASTICSEARCH_MICRO_CACHE_ENABLED", default=False
+)

From 5b9062d787b3251ffc21595a5e0a65fa44bafc9e Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 28 Aug 2024 13:02:52 -0500
Subject: [PATCH 267/372] fix(elasticsearch): Use pickle instead of json.dumps
 to serialize GET params for hashing

---
 cl/search/views.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/search/views.py b/cl/search/views.py
index 7094170a24..46d5b39835 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -881,8 +881,8 @@ def retrieve_cached_search_results(
     # If no page is present in the parameters, set it to 1 to generate the same
     # hash for page 1, regardless of whether the page parameter is included.
     params.setdefault("page", "1")
-    sorted_params = json.dumps(dict(sorted(params.items())), sort_keys=True)
-    params_hash = sha256(sorted_params.encode("utf-8"))
+    sorted_params = dict(sorted(params.items()))
+    params_hash = sha256(pickle.dumps(sorted_params))
     cached_results = cache.get(params_hash)
     if cached_results:
         return pickle.loads(cached_results), params_hash

From 8a975d7acfb1767083df6f10d9d7f4cd8934e0ff Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 28 Aug 2024 13:56:30 -0500
Subject: [PATCH 268/372] fix(elasticsearch): Cache q empty param when is not
 present within GET params

- Set SEARCH_RESULTS_MICRO_CACHE to 10 minutes.
---
 cl/search/tests/tests_es_recap.py | 8 ++++++++
 cl/search/views.py                | 2 ++
 cl/settings/project/search.py     | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 282b26921d..783c6a372a 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2645,6 +2645,14 @@ async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         # fetch_es_results is called this time; the cache is not used.
         self.assertEqual(mock_fetch_es.call_count, 4)
 
+        # Confirm results without q parameter are cached as q="".
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+        }
+        r = await self._test_article_count(params, 2, "filter + text query")
+        # fetch_es_results is not called again; results are retrieved from the cache.
+        self.assertEqual(mock_fetch_es.call_count, 4)
+
         # Same parameters, including page: 1 explicitly.
         params = {
             "type": SEARCH_TYPES.RECAP,
diff --git a/cl/search/views.py b/cl/search/views.py
index 46d5b39835..c39c578c80 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -880,7 +880,9 @@ def retrieve_cached_search_results(
     params = get_params.copy()
     # If no page is present in the parameters, set it to 1 to generate the same
     # hash for page 1, regardless of whether the page parameter is included.
+    # Apply the same to the q parameter when it is not present in params.
     params.setdefault("page", "1")
+    params.setdefault("q", "")
     sorted_params = dict(sorted(params.items()))
     params_hash = sha256(pickle.dumps(sorted_params))
     cached_results = cache.get(params_hash)
diff --git a/cl/settings/project/search.py b/cl/settings/project/search.py
index 49fbfd839c..e580108dee 100644
--- a/cl/settings/project/search.py
+++ b/cl/settings/project/search.py
@@ -61,7 +61,7 @@
 RELATED_MLT_MAXWL = 0
 RELATED_FILTER_BY_STATUS = "Precedential"
 QUERY_RESULTS_CACHE = 60 * 60 * 6
-SEARCH_RESULTS_MICRO_CACHE = 60 * 60
+SEARCH_RESULTS_MICRO_CACHE = 60 * 10
 
 #####################
 # Search pagination #

From 701b10e828833a5600fdb4d1d342d5564f4f8dc9 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 28 Aug 2024 14:10:06 -0500
Subject: [PATCH 269/372] fix(elasticsearch): Clean the cache after the
 micro-cache test to avoid affecting other tests.

---
 cl/search/tests/tests_es_recap.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 783c6a372a..31cdb16e85 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2672,6 +2672,7 @@ async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         r = await self._test_article_count(params, 0, "filter + text query")
         # fetch_es_results is called this time; the cache is not used.
         self.assertEqual(mock_fetch_es.call_count, 5)
+        cache.clear()
 
 
 class RECAPSearchAPICommonTests(RECAPSearchTestCase):

From 5a5d122a2e5cf9c40064eb6898bafa4fa7cf4cea Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 28 Aug 2024 14:42:00 -0500
Subject: [PATCH 270/372] fix(elasticsearch): Remove unused json import

---
 cl/search/views.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cl/search/views.py b/cl/search/views.py
index c39c578c80..6631d6d2ea 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -1,4 +1,3 @@
-import json
 import logging
 import pickle
 import traceback

From 5530332b99c085ee0e4955d91241c3da1b38bf1a Mon Sep 17 00:00:00 2001
From: legaltextai <144342123+legaltextai@users.noreply.github.com>
Date: Wed, 28 Aug 2024 16:05:27 -0400
Subject: [PATCH 271/372] fixing regex error by adding 'r'

---
 cl/people_db/lookup_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/people_db/lookup_utils.py b/cl/people_db/lookup_utils.py
index 43be587b38..4216dbf84d 100644
--- a/cl/people_db/lookup_utils.py
+++ b/cl/people_db/lookup_utils.py
@@ -328,7 +328,8 @@ def find_just_name(text: str) -> str:
 
     # Next up is full names followed by a comma
     match_titles = re.search(
-        "(((Van|VAN|De|DE|Da|DA)\s)?[A-Z][\w\-'']{2,}(\s(IV|I|II|III|V|Jr\.|JR\.|Sr\.|SR\.))?),",
+        #adding 'r' prefix to address python 3.12 strickness around escape sequences in string literals
+        r"(((Van|VAN|De|DE|Da|DA)\s)?[A-Z][\w\-'']{2,}(\s(IV|I|II|III|V|Jr\.|JR\.|Sr\.|SR\.))?),",
         cleaned_text,
     )
     if match_titles:

From c771725fd6e310f9a9f37e1f451e2e04a250b17d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 28 Aug 2024 20:31:14 +0000
Subject: [PATCH 272/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/people_db/lookup_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/people_db/lookup_utils.py b/cl/people_db/lookup_utils.py
index 4216dbf84d..221e15a2a9 100644
--- a/cl/people_db/lookup_utils.py
+++ b/cl/people_db/lookup_utils.py
@@ -328,7 +328,7 @@ def find_just_name(text: str) -> str:
 
     # Next up is full names followed by a comma
     match_titles = re.search(
-        #adding 'r' prefix to address python 3.12 strickness around escape sequences in string literals
+        # adding 'r' prefix to address python 3.12 strickness around escape sequences in string literals
         r"(((Van|VAN|De|DE|Da|DA)\s)?[A-Z][\w\-'']{2,}(\s(IV|I|II|III|V|Jr\.|JR\.|Sr\.|SR\.))?),",
         cleaned_text,
     )

From 132534407c3a805fe46c116811bdf2a113cfa57d Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 28 Aug 2024 12:51:53 -0400
Subject: [PATCH 273/372] feat(csv): Adds ratelimiter to throttle csv exports
 per hour

---
 cl/lib/ratelimiter.py    |  5 +++++
 cl/opinion_page/tests.py |  2 +-
 cl/opinion_page/views.py | 12 ++++++------
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/cl/lib/ratelimiter.py b/cl/lib/ratelimiter.py
index 07e4bccdab..6ac26b0a06 100644
--- a/cl/lib/ratelimiter.py
+++ b/cl/lib/ratelimiter.py
@@ -67,6 +67,7 @@ def get_path_to_make_key(group: str, request: HttpRequest) -> str:
     ratelimiter_all_2_per_m = lambda func: func
     ratelimiter_unsafe_3_per_m = lambda func: func
     ratelimiter_unsafe_10_per_m = lambda func: func
+    ratelimiter_all_10_per_h = lambda func: func
     ratelimiter_unsafe_2000_per_h = lambda func: func
 else:
     ratelimiter_all_2_per_m = ratelimit(
@@ -83,6 +84,10 @@ def get_path_to_make_key(group: str, request: HttpRequest) -> str:
         rate="10/m",
         method=UNSAFE,
     )
+    ratelimiter_all_10_per_h = ratelimit(
+        key=get_path_to_make_key,
+        rate="10/h",
+    )
     ratelimiter_unsafe_2000_per_h = ratelimit(
         key=get_path_to_make_key,
         rate="2000/h",
diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index df29677495..d5ac1475d6 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -1705,7 +1705,7 @@ def test_view_download_docket_entries_csv(
                 "private": True,
             },
         )
-        response = async_to_sync(download_docket_entries_csv)(
+        response = download_docket_entries_csv(
             self.request, self.mocked_docket.id
         )
         self.assertEqual(response["Content-Type"], "text/csv")
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 3d93f427e5..59c23238d0 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -50,6 +50,7 @@
 from cl.lib.http import is_ajax
 from cl.lib.model_helpers import choices_to_csv
 from cl.lib.models import THUMBNAIL_STATUSES
+from cl.lib.ratelimiter import ratelimiter_all_10_per_h
 from cl.lib.search_utils import (
     get_citing_clusters_with_cache,
     get_related_clusters_with_cache,
@@ -579,13 +580,14 @@ async def make_thumb_if_needed(
     return rd
 
 
-async def download_docket_entries_csv(
+@ratelimiter_all_10_per_h
+def download_docket_entries_csv(
     request: HttpRequest, docket_id: int
 ) -> HttpResponse:
     """Download csv file containing list of DocketEntry for specific Docket"""
 
-    docket, _ = await core_docket_data(request, docket_id)
-    de_list = await fetch_docket_entries(docket)
+    docket, _ = async_to_sync(core_docket_data)(request, docket_id)
+    de_list = async_to_sync(fetch_docket_entries)(docket)
     court_id = docket.court_id
     case_name = docket.slug
 
@@ -593,9 +595,7 @@ async def download_docket_entries_csv(
     filename = f"{case_name}.{court_id}.{docket_id}.{date_str}.csv"
 
     # TODO check if for large files we'll cache or send file by email
-    csv_content = await sync_to_async(generate_docket_entries_csv_data)(
-        de_list
-    )
+    csv_content = generate_docket_entries_csv_data(de_list)
     response: HttpResponse = HttpResponse(csv_content, content_type="text/csv")
     response["Content-Disposition"] = f'attachment; filename="{filename}"'
     return response

From c3c2486262721ec696003b21e5b14d754e8d73f9 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 28 Aug 2024 16:13:30 -0400
Subject: [PATCH 274/372] feat(csv): Adds spinner and error handling to de_list
 template

---
 cl/assets/static-global/css/override.css      | 13 +++++
 cl/assets/static-global/js/export-csv.js      | 55 +++++++++++++++++++
 cl/opinion_page/templates/docket_tabs.html    |  8 +++
 .../templates/includes/de_list.html           | 33 +++++++++--
 4 files changed, 103 insertions(+), 6 deletions(-)
 create mode 100644 cl/assets/static-global/js/export-csv.js

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 4f27744cab..4e13419050 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -897,6 +897,14 @@ input.court-checkbox, input.status-checkbox {
   padding: 0px;
 }
 
+#export-csv-spinner{
+  padding: 7px 0px;
+}
+
+#export-csv-error {
+  padding: 5px 0px;
+}
+
 @media (min-width: 767px) {
   .export-csv {
     padding: 5px 10px;
@@ -1690,3 +1698,8 @@ rect.series-segment {
 .ml-1 {
   margin-left: 0.25rem !important;
 }
+
+.htmx-indicator {
+  opacity: 0;
+  transition: opacity 200ms ease-in;
+}
diff --git a/cl/assets/static-global/js/export-csv.js b/cl/assets/static-global/js/export-csv.js
new file mode 100644
index 0000000000..90fd2206a3
--- /dev/null
+++ b/cl/assets/static-global/js/export-csv.js
@@ -0,0 +1,55 @@
+document.addEventListener('htmx:beforeRequest', function () {
+  // If the mobile error message is currently visible, adds the 'hidden' class
+  // to hide it.
+  let mobileErrorMessage = document.getElementById('mobile-export-csv-error');
+  if (!mobileErrorMessage.classList.contains('hidden')) {
+    mobileErrorMessage.classList.add('hidden');
+  }
+
+  // If the desktop error message is currently visible, adds the 'hidden' class
+  // to hide it.
+  let desktopErrorMessage = document.getElementById('export-csv-error');
+  if (!desktopErrorMessage.classList.contains('hidden')) {
+    desktopErrorMessage.classList.add('hidden');
+  }
+});
+
+document.addEventListener('htmx:beforeOnLoad', function (event) {
+  // Get the XMLHttpRequest object from the event details
+  const xhr = event.detail.xhr;
+  if (xhr.status == 200) {
+    const response = xhr.response;
+    // Extract the filename from the Content-Disposition header and get
+    //the MIME type from the Content-Type header
+    const filename = xhr.getResponseHeader('Content-Disposition').split('=')[1];
+    const mimetype = xhr.getResponseHeader('Content-Type');
+
+    // Prepare a link element for a file download
+    // Create a hidden link element for download
+    const link = document.createElement('a');
+    link.style.display = 'none';
+
+    // Create a Blob object containing the response data with the correct
+    // MIME type and generate a temporary URL for it
+    const blob = new Blob([response], { type: mimetype });
+    const url = window.URL.createObjectURL(blob);
+
+    // Set the link's attributes for download
+    link.href = url;
+    link.download = filename.replaceAll('"', '');
+
+    // It needs to be added to the DOM so it can be clicked
+    document.body.appendChild(link);
+    link.click();
+
+    // Release the temporary URL after download (for memory management)
+    window.URL.revokeObjectURL(url);
+  } else {
+    // If the request failed, show the error messages
+    let mobileErrorMessage = document.getElementById('mobile-export-csv-error');
+    mobileErrorMessage.classList.remove('hidden');
+
+    let desktopErrorMessage = document.getElementById('export-csv-error');
+    desktopErrorMessage.classList.remove('hidden');
+  }
+});
diff --git a/cl/opinion_page/templates/docket_tabs.html b/cl/opinion_page/templates/docket_tabs.html
index 5f0c6d8bb6..48aa3f6318 100644
--- a/cl/opinion_page/templates/docket_tabs.html
+++ b/cl/opinion_page/templates/docket_tabs.html
@@ -19,6 +19,13 @@
     <script src="{% static "js/jquery.bootstrap-growl.min.js" %}"></script>
     <script defer type="text/javascript"
             src="{% static "js/save-notes.js" %}"></script>
+    {% if DEBUG %}
+      <script src="{% static "js/htmx.js" %}"></script>
+      <script src="{% static "js/fix-toolbar-for-htmx.js" %}"></script>
+    {% else %}
+      <script src="{% static "js/htmx.min.js" %}"></script>
+    {% endif %}
+
     {% if request.user.is_authenticated %}
       <script defer type="text/javascript"
               src="{% static "js/toggle_settings.js" %}"></script>
@@ -47,6 +54,7 @@
     <script type="text/javascript"
             src="{% static "js/react/vendor.js" %}"></script>
 
+    <script type="text/javascript" src="{% static "js/export-csv.js" %}"></script>
 {% endblock %}
 
 {% block nav %}
diff --git a/cl/opinion_page/templates/includes/de_list.html b/cl/opinion_page/templates/includes/de_list.html
index 48d7e9bbf9..a9ee0da37b 100644
--- a/cl/opinion_page/templates/includes/de_list.html
+++ b/cl/opinion_page/templates/includes/de_list.html
@@ -8,21 +8,42 @@
       <p class="hidden-xs">Document Number</p>
     </div>
     <div class="col-xs-3 col-sm-2">Date&nbsp;Filed</div>
-    <div class="col-xs-8 col-sm-6">
-      <div class="col-xs-9 description-header">
+    <div class="col-xs-8 col-sm-2">
+      <div class="col-xs-3 description-header">
         Description
       </div>
-      <div class="col-xs-3">
-        <a href="{% url 'view_download_docket' docket.id %}" class="btn export-csv visible-xs">
+      <div class="col-xs-6 description-header flex justify-content-end">
+        <span id="mobile-export-csv-error" class="hidden visible-xs text-danger">
+          <i class="fa fa-info-circle"></i>&nbspTry again later.
+        </span>
+      </div>
+      <div class="col-xs-3 flex">
+        <a hx-get="{% url 'view_download_docket' docket.id %}"
+          hx-swap="none" hx-indicator="#mobile-spinner"
+          hx-trigger="click" class="btn export-csv visible-xs">
           <div class="flex align-items-center">
             <i class="fa fa-download gray"></i>&nbsp;
             <span>CSV</span>
           </div>
         </a>
+        &nbsp;
+        <span>
+          <i id="mobile-spinner" class="htmx-indicator fa fa-spinner fa-spin fa-lg"></i>
+        </span>
       </div>
     </div>
-    <div class="flex justify-content-end col-xs-3">
-      <a href="{% url 'view_download_docket' docket.id %}" class="btn btn-default hidden-xs export-csv">
+    <div class="flex justify-content-end col-xs-7 hidden-xs">
+      <span id="export-csv-error" class="text-danger hidden">
+        <i class="fa fa-info-circle"></i> There was a problem. Try again later.
+      </span>
+      <span id="export-csv-spinner" class="htmx-indicator" >
+        <i class="fa fa-spinner fa-spin hidden-lg"></i>
+        <i class="fa fa-spinner fa-spin fa-lg visible-lg"></i>
+      </span>
+      &nbsp;
+      <a hx-get="{% url 'view_download_docket' docket.id %}"
+        hx-swap="none" hx-indicator="#export-csv-spinner" hx-trigger="click"
+        class="btn btn-default hidden-xs export-csv">
         <div class="flex align-items-center">
           <i class="fa fa-download gray hidden-lg"></i>
           <i class="fa fa-download fa-lg gray visible-lg"></i>&nbsp;

From 0c4d5c5a85294a21577a2b3309ed1da74afb6f54 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Wed, 28 Aug 2024 19:50:33 -0400
Subject: [PATCH 275/372] feat(template): Position spinner within button and
 apply custom styling

This commit moves the spinner icon to the right side of the button text and uses a custom class to control its visibility, replacing the previous opacity-based approach.
---
 cl/assets/static-global/css/override.css        | 14 ++++++++++----
 cl/opinion_page/templates/includes/de_list.html | 11 ++++++-----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 4e13419050..d19162860e 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -897,10 +897,6 @@ input.court-checkbox, input.status-checkbox {
   padding: 0px;
 }
 
-#export-csv-spinner{
-  padding: 7px 0px;
-}
-
 #export-csv-error {
   padding: 5px 0px;
 }
@@ -1703,3 +1699,13 @@ rect.series-segment {
   opacity: 0;
   transition: opacity 200ms ease-in;
 }
+
+.htmx-hidden-indicator {
+    display:none;
+}
+.htmx-request .htmx-hidden-indicator {
+    display:inline;
+}
+.htmx-request.htmx-hidden-indicator {
+    display:inline;
+}
diff --git a/cl/opinion_page/templates/includes/de_list.html b/cl/opinion_page/templates/includes/de_list.html
index a9ee0da37b..508fc64fdf 100644
--- a/cl/opinion_page/templates/includes/de_list.html
+++ b/cl/opinion_page/templates/includes/de_list.html
@@ -32,14 +32,10 @@
         </span>
       </div>
     </div>
-    <div class="flex justify-content-end col-xs-7 hidden-xs">
+    <div class="flex justify-content-end col-sm-7 hidden-xs">
       <span id="export-csv-error" class="text-danger hidden">
         <i class="fa fa-info-circle"></i> There was a problem. Try again later.
       </span>
-      <span id="export-csv-spinner" class="htmx-indicator" >
-        <i class="fa fa-spinner fa-spin hidden-lg"></i>
-        <i class="fa fa-spinner fa-spin fa-lg visible-lg"></i>
-      </span>
       &nbsp;
       <a hx-get="{% url 'view_download_docket' docket.id %}"
         hx-swap="none" hx-indicator="#export-csv-spinner" hx-trigger="click"
@@ -48,6 +44,11 @@
           <i class="fa fa-download gray hidden-lg"></i>
           <i class="fa fa-download fa-lg gray visible-lg"></i>&nbsp;
           <span>Export CSV</span>
+          &nbsp;
+          <span id="export-csv-spinner" class="htmx-hidden-indicator" >
+            <i class="fa fa-spinner fa-spin hidden-lg"></i>
+            <i class="fa fa-spinner fa-spin fa-lg visible-lg"></i>
+          </span>
         </div>
       </a>
     </div>

From c0700e6fa3532a946690e2275bb86cebbe5843a8 Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Fri, 23 Aug 2024 15:04:04 -0600
Subject: [PATCH 276/372] Fallback to looking up attachments based on
 pacer_doc_id

---
 cl/corpus_importer/tasks.py |  2 +-
 cl/recap/mergers.py         | 86 +++++++++++++++++++++++++++++--------
 poetry.lock                 |  8 ++--
 pyproject.toml              |  2 +-
 4 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 7f8862d596..e00092b791 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -1130,7 +1130,7 @@ def do_case_query_by_pacer_case_id(
     async_to_sync(update_docket_metadata)(d, docket_data)
     d.save()
 
-    add_tags_to_objs(tag_names, [d])
+    async_to_sync(add_tags_to_objs)(tag_names, [d])
 
     # Add the HTML to the docket in case we need it someday.
     pacer_file = PacerHtmlFiles(
diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 8ccdc4e472..5e726194bd 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -816,7 +816,7 @@ async def get_or_make_docket_entry(
 async def add_docket_entries(
     d: Docket,
     docket_entries: list[dict[str, Any]],
-    tags: list[str] | None = None,
+    tags: list[Tag] | None = None,
     do_not_update_existing: bool = False,
 ) -> tuple[
     tuple[list[DocketEntry], list[RECAPDocument]], list[RECAPDocument], bool
@@ -873,7 +873,7 @@ async def add_docket_entries(
         await de.asave()
         if tags:
             for tag in tags:
-                tag.tag_object(de)
+                await sync_to_async(tag.tag_object)(de)
 
         if de_created:
             content_updated = True
@@ -918,7 +918,10 @@ async def add_docket_entries(
                 params["document_type"] = RECAPDocument.ATTACHMENT
                 params["pacer_doc_id"] = docket_entry["pacer_doc_id"]
         try:
-            rd = await RECAPDocument.objects.aget(**params)
+            get_params = deepcopy(params)
+            if de_created is False and not appelate_court_id_exists:
+                del get_params["document_type"]
+            rd = await RECAPDocument.objects.aget(**get_params)
             rds_updated.append(rd)
         except RECAPDocument.DoesNotExist:
             try:
@@ -950,9 +953,24 @@ async def add_docket_entries(
             await duplicate_rd_queryset.exclude(pk=rd.pk).adelete()
 
         rd.pacer_doc_id = rd.pacer_doc_id or docket_entry["pacer_doc_id"]
-        rd.description = (
-            docket_entry.get("short_description") or rd.description
-        )
+        description = docket_entry.get("short_description") or rd.description
+        if rd.document_type == RECAPDocument.PACER_DOCUMENT:
+            rd.description = description
+        else:
+            rd_qs = de.recap_documents.filter(
+                document_type=RECAPDocument.PACER_DOCUMENT
+            )
+            if await rd_qs.aexists():
+                rd_pd = await rd_qs.afirst()
+                if rd_pd.attachment_number is not None:
+                    continue
+                if rd_pd.description != description:
+                    rd_pd.description = description
+                    try:
+                        await rd_pd.asave()
+                    except ValidationError:
+                        # Happens from race conditions.
+                        continue
         rd.document_number = docket_entry["document_number"] or ""
         try:
             await rd.asave()
@@ -961,7 +979,7 @@ async def add_docket_entries(
             continue
         if tags:
             for tag in tags:
-                tag.tag_object(rd)
+                await sync_to_async(tag.tag_object)(rd)
 
         attachments = docket_entry.get("attachments")
         if attachments is not None:
@@ -1430,7 +1448,7 @@ def add_claims_to_docket(d, new_claims, tag_names=None):
         )
         db_claim.remarks = new_claim.get("remarks") or db_claim.remarks
         db_claim.save()
-        add_tags_to_objs(tag_names, [db_claim])
+        async_to_sync(add_tags_to_objs)(tag_names, [db_claim])
         for new_history in new_claim["history"]:
             add_claim_history_entry(new_history, db_claim)
 
@@ -1457,7 +1475,7 @@ def get_data_from_appellate_att_report(
     return att_data
 
 
-async def add_tags_to_objs(tag_names: List[str], objs: Any) -> QuerySet:
+async def add_tags_to_objs(tag_names: List[str], objs: Any) -> list[Tag]:
     """Add tags by name to objects
 
     :param tag_names: A list of tag name strings
@@ -1469,14 +1487,14 @@ async def add_tags_to_objs(tag_names: List[str], objs: Any) -> QuerySet:
     if tag_names is None:
         return []
 
-    tags = []
+    tags: list[Tag] = []
     for tag_name in tag_names:
         tag, _ = await Tag.objects.aget_or_create(name=tag_name)
         tags.append(tag)
 
     for tag in tags:
         for obj in objs:
-            tag.tag_object(obj)
+            await sync_to_async(tag.tag_object)(obj)
     return tags
 
 
@@ -1700,8 +1718,6 @@ async def merge_attachment_page_data(
             attachment["attachment_number"],
             # Missing on sealed items.
             attachment.get("pacer_doc_id", False),
-            # Missing on some restricted docs (see Juriscraper)
-            attachment["page_count"] is not None,
             attachment["description"],
         ]
         if not all(sanity_checks):
@@ -1734,17 +1750,49 @@ async def merge_attachment_page_data(
             try:
                 rd = await RECAPDocument.objects.aget(**params)
             except RECAPDocument.DoesNotExist:
-                rd = RECAPDocument(**params)
-                rds_created.append(rd)
+                try:
+                    doc_id_params = deepcopy(params)
+                    del doc_id_params["attachment_number"]
+                    del doc_id_params["document_type"]
+                    doc_id_params["pacer_doc_id"] = attachment["pacer_doc_id"]
+                    rd = await RECAPDocument.objects.aget(**doc_id_params)
+                    if attachment.get("attachment_number") == 0:
+                        try:
+                            old_main_rd = await RECAPDocument.objects.aget(
+                                de=de,
+                                document_type=RECAPDocument.PACER_DOCUMENT,
+                            )
+                            rd.description = old_main_rd.description
+                        except RECAPDocument.DoesNotExist:
+                            rd.description = ""
+                        except RECAPDocument.MultipleObjectsReturned:
+                            rd.description = ""
+                            logger.info(
+                                f"Failed to migrate description for "
+                                f"{attachment["pacer_doc_id"]}, "
+                                f"multiple source documents found."
+                            )
+                        rd.attachment_number = None
+                        rd.document_type = RECAPDocument.PACER_DOCUMENT
+                    else:
+                        rd.attachment_number = attachment["attachment_number"]
+                        rd.document_type = RECAPDocument.ATTACHMENT
+                except RECAPDocument.DoesNotExist:
+                    rd = RECAPDocument(**params)
+                    rds_created.append(rd)
 
         rds_affected.append(rd)
-        for field in ["description", "pacer_doc_id"]:
-            if attachment[field]:
-                setattr(rd, field, attachment[field])
+        if (
+            attachment["description"]
+            and rd.document_type == RECAPDocument.ATTACHMENT
+        ):
+            rd.description = attachment["description"]
+        if attachment["pacer_doc_id"]:
+            rd.pacer_doc_id = attachment["pacer_doc_id"]
 
         # Only set page_count and file_size if they're blank, in case
         # we got the real value by measuring.
-        if rd.page_count is None:
+        if rd.page_count is None and attachment.get("page_count", None):
             rd.page_count = attachment["page_count"]
         # If we have file_size_bytes it should have max precision.
         file_size_bytes = attachment.get("file_size_bytes")
diff --git a/poetry.lock b/poetry.lock
index ec0a881741..f085fbe921 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2236,13 +2236,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.19"
+version = "2.6.20"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.19-py27-none-any.whl", hash = "sha256:fe5807f1ae7ecb4adcc971c5351fb56d7af71547f39d9ee56a78821c80db8276"},
-    {file = "juriscraper-2.6.19.tar.gz", hash = "sha256:ebb7312efa1f6b8ddfb5cf0b9ea678dbe40a248f575559860816ed4af507af9d"},
+    {file = "juriscraper-2.6.20-py27-none-any.whl", hash = "sha256:5de2830f5c0593ad3ea0ebbdfbac203bc4c4da537461a890d49d1d0c439b9ac7"},
+    {file = "juriscraper-2.6.20.tar.gz", hash = "sha256:895744c1edde3828f4ff9f62c1366010bb636e54e59bb6848729e598cc15fd69"},
 ]
 
 [package.dependencies]
@@ -5466,4 +5466,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "7a2f54103ce6aaa8d20563aa306fcfa93e656b8e10f748e88bad288dc0d44ebe"
+content-hash = "7b4647b80299ec79c1146ec44d7eed564bf35e259cf821e2a00450d832250d3c"
diff --git a/pyproject.toml b/pyproject.toml
index 636a556658..19afc96344 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -115,7 +115,7 @@ hyperscan = "^0.7.7"
 openai = "^1.31.1"
 seal-rookery = "^2.2.3"
 types-pytz = "^2024.1.0.20240417"
-juriscraper = "^2.6.15"
+juriscraper = "^2.6.20"
 
 
 [tool.poetry.group.dev.dependencies]

From c29261693c0189556d3a5f8dc971f3de4db094a5 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 29 Aug 2024 09:39:27 -0500
Subject: [PATCH 277/372] fix(elasticsearch): Set query time to 1ms when
 retrieving results from the micro-cache

---
 cl/search/tests/tests_es_recap.py | 3 +++
 cl/search/views.py                | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 31cdb16e85..4f0f937a0d 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2575,6 +2575,7 @@ async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         self._assert_results_header_content(
             r.content.decode(), "1 Docket Entry"
         )
+        self.assertNotIn("1ms", r.content.decode())
 
         # Repeat the query:
         r = await self._test_article_count(params, 1, "filter + text query")
@@ -2588,6 +2589,8 @@ async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         self._assert_results_header_content(
             r.content.decode(), "1 Docket Entry"
         )
+        # 1ms query time when using the micro-cache.
+        self.assertIn("1ms", r.content.decode())
 
         # Change params order and repeat the query:
         params = {
diff --git a/cl/search/views.py b/cl/search/views.py
index 6631d6d2ea..a0725c785f 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -920,10 +920,10 @@ def fetch_and_paginate_results(
     # Check micro-cache for all other search requests.
     results_dict, get_params_hash = retrieve_cached_search_results(get_params)
     if results_dict:
-        # Return results and counts. Set query time to 0ms.
+        # Return results and counts. Set query time to 1ms.
         return (
             results_dict["results"],
-            0,
+            1,
             False,
             results_dict["main_total"],
             results_dict["child_total"],

From 8d052da900667da53a139c9a00afd5d045c47bd6 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 29 Aug 2024 10:17:12 -0500
Subject: [PATCH 278/372] fix(elasticsearch): Fix micro-cache query time
 assertion

---
 cl/search/tests/tests_es_recap.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 4f0f937a0d..2f20f5ffe7 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2575,7 +2575,6 @@ async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         self._assert_results_header_content(
             r.content.decode(), "1 Docket Entry"
         )
-        self.assertNotIn("1ms", r.content.decode())
 
         # Repeat the query:
         r = await self._test_article_count(params, 1, "filter + text query")

From 475c9429ffe1d298768c776a8f214cfd19f1c1ab Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 29 Aug 2024 15:42:39 +0000
Subject: [PATCH 279/372] build(deps-dev): bump webpack from 5.76.0 to 5.94.0
 in /cl

Bumps [webpack](https://github.com/webpack/webpack) from 5.76.0 to 5.94.0.
- [Release notes](https://github.com/webpack/webpack/releases)
- [Commits](https://github.com/webpack/webpack/compare/v5.76.0...v5.94.0)

---
updated-dependencies:
- dependency-name: webpack
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 cl/package-lock.json | 828 +++++++++++++++++++++----------------------
 cl/package.json      |   2 +-
 2 files changed, 396 insertions(+), 434 deletions(-)

diff --git a/cl/package-lock.json b/cl/package-lock.json
index a759a6b0f1..20acad7d9c 100644
--- a/cl/package-lock.json
+++ b/cl/package-lock.json
@@ -51,7 +51,7 @@
         "react-virtual": "^2.2.1",
         "terser-webpack-plugin": "^5.3.6",
         "typescript": "^4.2.4",
-        "webpack": "^5.76.0",
+        "webpack": "^5.94.0",
         "webpack-bundle-analyzer": "^4.4.1",
         "webpack-cli": "^4.10.0",
         "webpack-dev-server": "^4.10.0"
@@ -1917,13 +1917,13 @@
       "dev": true
     },
     "node_modules/@jridgewell/gen-mapping": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.2.tgz",
-      "integrity": "sha512-mh65xKQAzI6iBcFzwv28KVWSmCkdRBWoOh+bYQGW3+6OZvbbN3TqMGo5hqYxQniRcH9F2VZIoJCm4pa3BPDK/A==",
+      "version": "0.3.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz",
+      "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==",
       "dependencies": {
-        "@jridgewell/set-array": "^1.0.1",
+        "@jridgewell/set-array": "^1.2.1",
         "@jridgewell/sourcemap-codec": "^1.4.10",
-        "@jridgewell/trace-mapping": "^0.3.9"
+        "@jridgewell/trace-mapping": "^0.3.24"
       },
       "engines": {
         "node": ">=6.0.0"
@@ -1938,20 +1938,20 @@
       }
     },
     "node_modules/@jridgewell/set-array": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz",
-      "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==",
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz",
+      "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==",
       "engines": {
         "node": ">=6.0.0"
       }
     },
     "node_modules/@jridgewell/source-map": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.2.tgz",
-      "integrity": "sha512-m7O9o2uR8k2ObDysZYzdfhb08VuEml5oWGiosa1VdaPZ/A6QyPkAJuwN0Q1lhULOf6B7MtQmHENS743hWtCrgw==",
+      "version": "0.3.6",
+      "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.6.tgz",
+      "integrity": "sha512-1ZJTZebgqllO79ue2bm3rIGud/bOe0pP5BjSRCRxxYkEZS8STV7zN84UBbiYu7jy+eCKSnVIUgoWWE/tt+shMQ==",
       "dependencies": {
-        "@jridgewell/gen-mapping": "^0.3.0",
-        "@jridgewell/trace-mapping": "^0.3.9"
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.25"
       }
     },
     "node_modules/@jridgewell/sourcemap-codec": {
@@ -1960,9 +1960,9 @@
       "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw=="
     },
     "node_modules/@jridgewell/trace-mapping": {
-      "version": "0.3.19",
-      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.19.tgz",
-      "integrity": "sha512-kf37QtfW+Hwx/buWGMPcR60iF9ziHa6r/CZJIHbmcm4+0qrXiVdxegAH0F6yddEVQ7zdkjcGCgCzUu+BcbhQxw==",
+      "version": "0.3.25",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz",
+      "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==",
       "dependencies": {
         "@jridgewell/resolve-uri": "^3.1.0",
         "@jridgewell/sourcemap-codec": "^1.4.14"
@@ -2024,24 +2024,6 @@
         "@types/node": "*"
       }
     },
-    "node_modules/@types/eslint": {
-      "version": "8.4.6",
-      "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.4.6.tgz",
-      "integrity": "sha512-/fqTbjxyFUaYNO7VcW5g+4npmqVACz1bB7RTHYuLj+PRjw9hrCwrUXVQFpChUS0JsyEFvMZ7U/PfmvWgxJhI9g==",
-      "dependencies": {
-        "@types/estree": "*",
-        "@types/json-schema": "*"
-      }
-    },
-    "node_modules/@types/eslint-scope": {
-      "version": "3.7.4",
-      "resolved": "https://registry.npmjs.org/@types/eslint-scope/-/eslint-scope-3.7.4.tgz",
-      "integrity": "sha512-9K4zoImiZc3HlIp6AVUDE4CWYx22a+lhSZMYNpbjW04+YF0KWj4pJXnEMjdnFTiQibFFmElcsasJXDbdI/EPhA==",
-      "dependencies": {
-        "@types/eslint": "*",
-        "@types/estree": "*"
-      }
-    },
     "node_modules/@types/eslint-visitor-keys": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/@types/eslint-visitor-keys/-/eslint-visitor-keys-1.0.0.tgz",
@@ -2049,9 +2031,9 @@
       "dev": true
     },
     "node_modules/@types/estree": {
-      "version": "0.0.51",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.51.tgz",
-      "integrity": "sha512-CuPgU6f3eT/XgKKPqKd/gLZV1Xmvf1a2R5POBOGQa6uv82xpls89HU5zKeVoyR8XzHd1RGNOlQlvUe3CFkjWNQ=="
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz",
+      "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw=="
     },
     "node_modules/@types/express": {
       "version": "4.17.13",
@@ -2436,133 +2418,133 @@
       }
     },
     "node_modules/@webassemblyjs/ast": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.11.1.tgz",
-      "integrity": "sha512-ukBh14qFLjxTQNTXocdyksN5QdM28S1CxHt2rdskFyL+xFV7VremuBLVbmCePj+URalXBENx/9Lm7lnhihtCSw==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.12.1.tgz",
+      "integrity": "sha512-EKfMUOPRRUTy5UII4qJDGPpqfwjOmZ5jeGFwid9mnoqIFK+e0vqoi1qH56JpmZSzEL53jKnNzScdmftJyG5xWg==",
       "dependencies": {
-        "@webassemblyjs/helper-numbers": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1"
+        "@webassemblyjs/helper-numbers": "1.11.6",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6"
       }
     },
     "node_modules/@webassemblyjs/floating-point-hex-parser": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.1.tgz",
-      "integrity": "sha512-iGRfyc5Bq+NnNuX8b5hwBrRjzf0ocrJPI6GWFodBFzmFnyvrQ83SHKhmilCU/8Jv67i4GJZBMhEzltxzcNagtQ=="
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.6.tgz",
+      "integrity": "sha512-ejAj9hfRJ2XMsNHk/v6Fu2dGS+i4UaXBXGemOfQ/JfQ6mdQg/WXtwleQRLLS4OvfDhv8rYnVwH27YJLMyYsxhw=="
     },
     "node_modules/@webassemblyjs/helper-api-error": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.1.tgz",
-      "integrity": "sha512-RlhS8CBCXfRUR/cwo2ho9bkheSXG0+NwooXcc3PAILALf2QLdFyj7KGsKRbVc95hZnhnERon4kW/D3SZpp6Tcg=="
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.6.tgz",
+      "integrity": "sha512-o0YkoP4pVu4rN8aTJgAyj9hC2Sv5UlkzCHhxqWj8butaLvnpdc2jOwh4ewE6CX0txSfLn/UYaV/pheS2Txg//Q=="
     },
     "node_modules/@webassemblyjs/helper-buffer": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.11.1.tgz",
-      "integrity": "sha512-gwikF65aDNeeXa8JxXa2BAk+REjSyhrNC9ZwdT0f8jc4dQQeDQ7G4m0f2QCLPJiMTTO6wfDmRmj/pW0PsUvIcA=="
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.12.1.tgz",
+      "integrity": "sha512-nzJwQw99DNDKr9BVCOZcLuJJUlqkJh+kVzVl6Fmq/tI5ZtEyWT1KZMyOXltXLZJmDtvLCDgwsyrkohEtopTXCw=="
     },
     "node_modules/@webassemblyjs/helper-numbers": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.1.tgz",
-      "integrity": "sha512-vDkbxiB8zfnPdNK9Rajcey5C0w+QJugEglN0of+kmO8l7lDb77AnlKYQF7aarZuCrv+l0UvqL+68gSDr3k9LPQ==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.6.tgz",
+      "integrity": "sha512-vUIhZ8LZoIWHBohiEObxVm6hwP034jwmc9kuq5GdHZH0wiLVLIPcMCdpJzG4C11cHoQ25TFIQj9kaVADVX7N3g==",
       "dependencies": {
-        "@webassemblyjs/floating-point-hex-parser": "1.11.1",
-        "@webassemblyjs/helper-api-error": "1.11.1",
+        "@webassemblyjs/floating-point-hex-parser": "1.11.6",
+        "@webassemblyjs/helper-api-error": "1.11.6",
         "@xtuc/long": "4.2.2"
       }
     },
     "node_modules/@webassemblyjs/helper-wasm-bytecode": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.1.tgz",
-      "integrity": "sha512-PvpoOGiJwXeTrSf/qfudJhwlvDQxFgelbMqtq52WWiXC6Xgg1IREdngmPN3bs4RoO83PnL/nFrxucXj1+BX62Q=="
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.6.tgz",
+      "integrity": "sha512-sFFHKwcmBprO9e7Icf0+gddyWYDViL8bpPjJJl0WHxCdETktXdmtWLGVzoHbqUcY4Be1LkNfwTmXOJUFZYSJdA=="
     },
     "node_modules/@webassemblyjs/helper-wasm-section": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.11.1.tgz",
-      "integrity": "sha512-10P9No29rYX1j7F3EVPX3JvGPQPae+AomuSTPiF9eBQeChHI6iqjMIwR9JmOJXwpnn/oVGDk7I5IlskuMwU/pg==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.12.1.tgz",
+      "integrity": "sha512-Jif4vfB6FJlUlSbgEMHUyk1j234GTNG9dBJ4XJdOySoj518Xj0oGsNi59cUQF4RRMS9ouBUxDDdyBVfPTypa5g==",
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/wasm-gen": "1.12.1"
       }
     },
     "node_modules/@webassemblyjs/ieee754": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.1.tgz",
-      "integrity": "sha512-hJ87QIPtAMKbFq6CGTkZYJivEwZDbQUgYd3qKSadTNOhVY7p+gfP6Sr0lLRVTaG1JjFj+r3YchoqRYxNH3M0GQ==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.6.tgz",
+      "integrity": "sha512-LM4p2csPNvbij6U1f19v6WR56QZ8JcHg3QIJTlSwzFcmx6WSORicYj6I63f9yU1kEUtrpG+kjkiIAkevHpDXrg==",
       "dependencies": {
         "@xtuc/ieee754": "^1.2.0"
       }
     },
     "node_modules/@webassemblyjs/leb128": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.1.tgz",
-      "integrity": "sha512-BJ2P0hNZ0u+Th1YZXJpzW6miwqQUGcIHT1G/sf72gLVD9DZ5AdYTqPNbHZh6K1M5VmKvFXwGSWZADz+qBWxeRw==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.6.tgz",
+      "integrity": "sha512-m7a0FhE67DQXgouf1tbN5XQcdWoNgaAuoULHIfGFIEVKA6tu/edls6XnIlkmS6FrXAquJRPni3ZZKjw6FSPjPQ==",
       "dependencies": {
         "@xtuc/long": "4.2.2"
       }
     },
     "node_modules/@webassemblyjs/utf8": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.1.tgz",
-      "integrity": "sha512-9kqcxAEdMhiwQkHpkNiorZzqpGrodQQ2IGrHHxCy+Ozng0ofyMA0lTqiLkVs1uzTRejX+/O0EOT7KxqVPuXosQ=="
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.6.tgz",
+      "integrity": "sha512-vtXf2wTQ3+up9Zsg8sa2yWiQpzSsMyXj0qViVP6xKGCUT8p8YJ6HqI7l5eCnWx1T/FYdsv07HQs2wTFbbof/RA=="
     },
     "node_modules/@webassemblyjs/wasm-edit": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.11.1.tgz",
-      "integrity": "sha512-g+RsupUC1aTHfR8CDgnsVRVZFJqdkFHpsHMfJuWQzWU3tvnLC07UqHICfP+4XyL2tnr1amvl1Sdp06TnYCmVkA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.12.1.tgz",
+      "integrity": "sha512-1DuwbVvADvS5mGnXbE+c9NfA8QRcZ6iKquqjjmR10k6o+zzsRVesil54DKexiowcFCPdr/Q0qaMgB01+SQ1u6g==",
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/helper-wasm-section": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1",
-        "@webassemblyjs/wasm-opt": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1",
-        "@webassemblyjs/wast-printer": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/helper-wasm-section": "1.12.1",
+        "@webassemblyjs/wasm-gen": "1.12.1",
+        "@webassemblyjs/wasm-opt": "1.12.1",
+        "@webassemblyjs/wasm-parser": "1.12.1",
+        "@webassemblyjs/wast-printer": "1.12.1"
       }
     },
     "node_modules/@webassemblyjs/wasm-gen": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.11.1.tgz",
-      "integrity": "sha512-F7QqKXwwNlMmsulj6+O7r4mmtAlCWfO/0HdgOxSklZfQcDu0TpLiD1mRt/zF25Bk59FIjEuGAIyn5ei4yMfLhA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.12.1.tgz",
+      "integrity": "sha512-TDq4Ojh9fcohAw6OIMXqiIcTq5KUXTGRkVxbSo1hQnSy6lAM5GSdfwWeSxpAo0YzgsgF182E/U0mDNhuA0tW7w==",
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/ieee754": "1.11.1",
-        "@webassemblyjs/leb128": "1.11.1",
-        "@webassemblyjs/utf8": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/ieee754": "1.11.6",
+        "@webassemblyjs/leb128": "1.11.6",
+        "@webassemblyjs/utf8": "1.11.6"
       }
     },
     "node_modules/@webassemblyjs/wasm-opt": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.11.1.tgz",
-      "integrity": "sha512-VqnkNqnZlU5EB64pp1l7hdm3hmQw7Vgqa0KF/KCNO9sIpI6Fk6brDEiX+iCOYrvMuBWDws0NkTOxYEb85XQHHw==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.12.1.tgz",
+      "integrity": "sha512-Jg99j/2gG2iaz3hijw857AVYekZe2SAskcqlWIZXjji5WStnOpVoat3gQfT/Q5tb2djnCjBtMocY/Su1GfxPBg==",
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/wasm-gen": "1.12.1",
+        "@webassemblyjs/wasm-parser": "1.12.1"
       }
     },
     "node_modules/@webassemblyjs/wasm-parser": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.11.1.tgz",
-      "integrity": "sha512-rrBujw+dJu32gYB7/Lup6UhdkPx9S9SnobZzRVL7VcBH9Bt9bCBLEuX/YXOOtBsOZ4NQrRykKhffRWHvigQvOA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.12.1.tgz",
+      "integrity": "sha512-xikIi7c2FHXysxXe3COrVUPSheuBtpcfhbpFj4gmu7KRLYOzANztwUU0IbsqvMqzuNK2+glRGWCEqZo1WCLyAQ==",
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-api-error": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/ieee754": "1.11.1",
-        "@webassemblyjs/leb128": "1.11.1",
-        "@webassemblyjs/utf8": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-api-error": "1.11.6",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/ieee754": "1.11.6",
+        "@webassemblyjs/leb128": "1.11.6",
+        "@webassemblyjs/utf8": "1.11.6"
       }
     },
     "node_modules/@webassemblyjs/wast-printer": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.11.1.tgz",
-      "integrity": "sha512-IQboUWM4eKzWW+N/jij2sRatKMh99QEelo3Eb2q0qXkvPRISAj8Qxtmw5itwqK+TTkBuUIE45AxYPToqPtL5gg==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.12.1.tgz",
+      "integrity": "sha512-+X4WAlOisVWQMikjbcvY2e0rwPsKQ9F688lksZhBcPycBBuii3O7m8FACbDMWDojpAqvjIncrG8J0XHKyQfVeA==",
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.1",
+        "@webassemblyjs/ast": "1.12.1",
         "@xtuc/long": "4.2.2"
       }
     },
@@ -3060,9 +3042,9 @@
       }
     },
     "node_modules/browserslist": {
-      "version": "4.21.9",
-      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.9.tgz",
-      "integrity": "sha512-M0MFoZzbUrRU4KNfCrDLnvyE7gub+peetoTid3TBIqtunaDJyXlwhakT+/VkvSXcfIzFfK/nkCs4nmyTmxdNSg==",
+      "version": "4.23.3",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.3.tgz",
+      "integrity": "sha512-btwCFJVjI4YWDNfau8RhZ+B1Q/VLoUITrm3RlP6y1tYGWIOa+InuYiRGXUBXo8nA1qKmHMyLB/iVQg5TT4eFoA==",
       "funding": [
         {
           "type": "opencollective",
@@ -3078,10 +3060,10 @@
         }
       ],
       "dependencies": {
-        "caniuse-lite": "^1.0.30001503",
-        "electron-to-chromium": "^1.4.431",
-        "node-releases": "^2.0.12",
-        "update-browserslist-db": "^1.0.11"
+        "caniuse-lite": "^1.0.30001646",
+        "electron-to-chromium": "^1.5.4",
+        "node-releases": "^2.0.18",
+        "update-browserslist-db": "^1.1.0"
       },
       "bin": {
         "browserslist": "cli.js"
@@ -3134,9 +3116,9 @@
       }
     },
     "node_modules/caniuse-lite": {
-      "version": "1.0.30001517",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001517.tgz",
-      "integrity": "sha512-Vdhm5S11DaFVLlyiKu4hiUTkpZu+y1KA/rZZqVQfOD5YdDT/eQKlkt7NaE0WGOFgX32diqt9MiP9CAiFeRklaA==",
+      "version": "1.0.30001653",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001653.tgz",
+      "integrity": "sha512-XGWQVB8wFQ2+9NZwZ10GxTYC5hk0Fa+q8cSkr0tgvMhYhMHP/QC+WTgrePMDBWiWc/pV+1ik82Al20XOK25Gcw==",
       "funding": [
         {
           "type": "opencollective",
@@ -3894,9 +3876,9 @@
       "dev": true
     },
     "node_modules/electron-to-chromium": {
-      "version": "1.4.467",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.467.tgz",
-      "integrity": "sha512-2qI70O+rR4poYeF2grcuS/bCps5KJh6y1jtZMDDEteyKJQrzLOEhFyXCLcHW6DTBjKjWkk26JhWoAi+Ux9A0fg=="
+      "version": "1.5.13",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.13.tgz",
+      "integrity": "sha512-lbBcvtIJ4J6sS4tb5TLp1b4LyfCdMkwStzXPyAgVgTRAsep4bvrAGaBOP7ZJtQMNJpSQ9SqG4brWOroNaQtm7Q=="
     },
     "node_modules/emoji-regex": {
       "version": "8.0.0",
@@ -3921,6 +3903,18 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/enhanced-resolve": {
+      "version": "5.17.1",
+      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz",
+      "integrity": "sha512-LMHl3dXhTcfv8gM4kEzIUeTQ+7fpdA0l2tUf34BddXPkz2A5xJ5L/Pchd5BL6rdccM9QGvu0sWZzK1Z1t4wwyg==",
+      "dependencies": {
+        "graceful-fs": "^4.2.4",
+        "tapable": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=10.13.0"
+      }
+    },
     "node_modules/enquirer": {
       "version": "2.3.6",
       "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz",
@@ -3996,9 +3990,9 @@
       }
     },
     "node_modules/es-module-lexer": {
-      "version": "0.9.3",
-      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-0.9.3.tgz",
-      "integrity": "sha512-1HQ2M2sPtxwnvOvT1ZClHyQDiggdNjURWpY2we6aMKCQiUVxTmVs2UYPLIrD84sS+kMdUwfBSylbJPwNnBrnHQ=="
+      "version": "1.5.4",
+      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.5.4.tgz",
+      "integrity": "sha512-MVNK56NiMrOwitFB7cqDwq0CQutbw+0BvLshJSse0MUNU+y1FC3bUS/AQg7oUng+/wKrrki7JfmwtVHkVfPLlw=="
     },
     "node_modules/es-to-primitive": {
       "version": "1.2.1",
@@ -4018,9 +4012,9 @@
       }
     },
     "node_modules/escalade": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
-      "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz",
+      "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==",
       "engines": {
         "node": ">=6"
       }
@@ -4938,9 +4932,9 @@
       }
     },
     "node_modules/graceful-fs": {
-      "version": "4.2.10",
-      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz",
-      "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA=="
+      "version": "4.2.11",
+      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
+      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="
     },
     "node_modules/gzip-size": {
       "version": "6.0.0",
@@ -6103,9 +6097,9 @@
       }
     },
     "node_modules/node-releases": {
-      "version": "2.0.13",
-      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.13.tgz",
-      "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ=="
+      "version": "2.0.18",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz",
+      "integrity": "sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g=="
     },
     "node_modules/normalize-path": {
       "version": "3.0.0",
@@ -6476,9 +6470,9 @@
       }
     },
     "node_modules/picocolors": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz",
-      "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ=="
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz",
+      "integrity": "sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew=="
     },
     "node_modules/picomatch": {
       "version": "2.3.1",
@@ -7472,6 +7466,14 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/serialize-javascript": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz",
+      "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==",
+      "dependencies": {
+        "randombytes": "^2.1.0"
+      }
+    },
     "node_modules/serve-index": {
       "version": "1.9.1",
       "resolved": "https://registry.npmjs.org/serve-index/-/serve-index-1.9.1.tgz",
@@ -8009,13 +8011,21 @@
       "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
       "dev": true
     },
+    "node_modules/tapable": {
+      "version": "2.2.1",
+      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
+      "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/terser": {
-      "version": "5.15.0",
-      "resolved": "https://registry.npmjs.org/terser/-/terser-5.15.0.tgz",
-      "integrity": "sha512-L1BJiXVmheAQQy+as0oF3Pwtlo4s3Wi1X2zNZ2NxOB4wx9bdS9Vk67XQENLFdLYGCK/Z2di53mTj/hBafR+dTA==",
+      "version": "5.31.6",
+      "resolved": "https://registry.npmjs.org/terser/-/terser-5.31.6.tgz",
+      "integrity": "sha512-PQ4DAriWzKj+qgehQ7LK5bQqCFNMmlhjR2PFFLuqGCpuCAauxemVBWwWOxo3UIwWQx8+Pr61Df++r76wDmkQBg==",
       "dependencies": {
-        "@jridgewell/source-map": "^0.3.2",
-        "acorn": "^8.5.0",
+        "@jridgewell/source-map": "^0.3.3",
+        "acorn": "^8.8.2",
         "commander": "^2.20.0",
         "source-map-support": "~0.5.20"
       },
@@ -8027,15 +8037,15 @@
       }
     },
     "node_modules/terser-webpack-plugin": {
-      "version": "5.3.6",
-      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.6.tgz",
-      "integrity": "sha512-kfLFk+PoLUQIbLmB1+PZDMRSZS99Mp+/MHqDNmMA6tOItzRt+Npe3E+fsMs5mfcM0wCtrrdU387UnV+vnSffXQ==",
+      "version": "5.3.10",
+      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.10.tgz",
+      "integrity": "sha512-BKFPWlPDndPs+NGGCr1U59t0XScL5317Y0UReNrHaw9/FwhPENlq6bfgs+4yPfyP51vqC1bQ4rp1EfXW5ZSH9w==",
       "dependencies": {
-        "@jridgewell/trace-mapping": "^0.3.14",
+        "@jridgewell/trace-mapping": "^0.3.20",
         "jest-worker": "^27.4.5",
         "schema-utils": "^3.1.1",
-        "serialize-javascript": "^6.0.0",
-        "terser": "^5.14.1"
+        "serialize-javascript": "^6.0.1",
+        "terser": "^5.26.0"
       },
       "engines": {
         "node": ">= 10.13.0"
@@ -8076,18 +8086,10 @@
         "url": "https://opencollective.com/webpack"
       }
     },
-    "node_modules/terser-webpack-plugin/node_modules/serialize-javascript": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz",
-      "integrity": "sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==",
-      "dependencies": {
-        "randombytes": "^2.1.0"
-      }
-    },
     "node_modules/terser/node_modules/acorn": {
-      "version": "8.8.0",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.0.tgz",
-      "integrity": "sha512-QOxyigPVrpZ2GXT+PFyZTl6TtOFc5egxHIP9IlQ+RbupQuX4RkT/Bee4/kQuC02Xkzg84JcT7oLYtDIQxp+v7w==",
+      "version": "8.12.1",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz",
+      "integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==",
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -8287,9 +8289,9 @@
       }
     },
     "node_modules/update-browserslist-db": {
-      "version": "1.0.11",
-      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.11.tgz",
-      "integrity": "sha512-dCwEFf0/oT85M1fHBg4F0jtLwJrutGoHSQXCh7u4o2t1drG+c0a9Flnqww6XUKSfQMPpJBRjU8d4RXB09qtvaA==",
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz",
+      "integrity": "sha512-EdRAaAyk2cUE1wOf2DkEhzxqOQvFOoRJFNS6NeyJ01Gp2beMRpBAINjM2iDXE3KCuKhwnvHIQCJm6ThL2Z+HzQ==",
       "funding": [
         {
           "type": "opencollective",
@@ -8305,8 +8307,8 @@
         }
       ],
       "dependencies": {
-        "escalade": "^3.1.1",
-        "picocolors": "^1.0.0"
+        "escalade": "^3.1.2",
+        "picocolors": "^1.0.1"
       },
       "bin": {
         "update-browserslist-db": "cli.js"
@@ -8375,9 +8377,9 @@
       }
     },
     "node_modules/watchpack": {
-      "version": "2.4.0",
-      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",
-      "integrity": "sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg==",
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.2.tgz",
+      "integrity": "sha512-TnbFSbcOCcDgjZ4piURLCbJ3nJhznVh9kw6F6iokjiFPl8ONxe9A6nMDVXDiNbrSfLILs6vB07F7wLBrwPYzJw==",
       "dependencies": {
         "glob-to-regexp": "^0.4.1",
         "graceful-fs": "^4.1.2"
@@ -8396,33 +8398,32 @@
       }
     },
     "node_modules/webpack": {
-      "version": "5.76.0",
-      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.76.0.tgz",
-      "integrity": "sha512-l5sOdYBDunyf72HW8dF23rFtWq/7Zgvt/9ftMof71E/yUb1YLOBmTgA2K4vQthB3kotMrSj609txVE0dnr2fjA==",
-      "dependencies": {
-        "@types/eslint-scope": "^3.7.3",
-        "@types/estree": "^0.0.51",
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/wasm-edit": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1",
+      "version": "5.94.0",
+      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.94.0.tgz",
+      "integrity": "sha512-KcsGn50VT+06JH/iunZJedYGUJS5FGjow8wb9c0v5n1Om8O1g4L6LjtfxwlXIATopoQu+vOXXa7gYisWxCoPyg==",
+      "dependencies": {
+        "@types/estree": "^1.0.5",
+        "@webassemblyjs/ast": "^1.12.1",
+        "@webassemblyjs/wasm-edit": "^1.12.1",
+        "@webassemblyjs/wasm-parser": "^1.12.1",
         "acorn": "^8.7.1",
-        "acorn-import-assertions": "^1.7.6",
-        "browserslist": "^4.14.5",
+        "acorn-import-attributes": "^1.9.5",
+        "browserslist": "^4.21.10",
         "chrome-trace-event": "^1.0.2",
-        "enhanced-resolve": "^5.10.0",
-        "es-module-lexer": "^0.9.0",
+        "enhanced-resolve": "^5.17.1",
+        "es-module-lexer": "^1.2.1",
         "eslint-scope": "5.1.1",
         "events": "^3.2.0",
         "glob-to-regexp": "^0.4.1",
-        "graceful-fs": "^4.2.9",
+        "graceful-fs": "^4.2.11",
         "json-parse-even-better-errors": "^2.3.1",
         "loader-runner": "^4.2.0",
         "mime-types": "^2.1.27",
         "neo-async": "^2.6.2",
-        "schema-utils": "^3.1.0",
+        "schema-utils": "^3.2.0",
         "tapable": "^2.1.1",
-        "terser-webpack-plugin": "^5.1.3",
-        "watchpack": "^2.4.0",
+        "terser-webpack-plugin": "^5.3.10",
+        "watchpack": "^2.4.1",
         "webpack-sources": "^3.2.3"
       },
       "bin": {
@@ -8830,9 +8831,9 @@
       }
     },
     "node_modules/webpack/node_modules/acorn": {
-      "version": "8.8.0",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.0.tgz",
-      "integrity": "sha512-QOxyigPVrpZ2GXT+PFyZTl6TtOFc5egxHIP9IlQ+RbupQuX4RkT/Bee4/kQuC02Xkzg84JcT7oLYtDIQxp+v7w==",
+      "version": "8.12.1",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz",
+      "integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==",
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -8840,30 +8841,18 @@
         "node": ">=0.4.0"
       }
     },
-    "node_modules/webpack/node_modules/acorn-import-assertions": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/acorn-import-assertions/-/acorn-import-assertions-1.8.0.tgz",
-      "integrity": "sha512-m7VZ3jwz4eK6A4Vtt8Ew1/mNbP24u0FhdyfA7fSvnJR6LMdfOYnmuIrrJAgrYfYJ10F/otaHTtrtrtmHdMNzEw==",
+    "node_modules/webpack/node_modules/acorn-import-attributes": {
+      "version": "1.9.5",
+      "resolved": "https://registry.npmjs.org/acorn-import-attributes/-/acorn-import-attributes-1.9.5.tgz",
+      "integrity": "sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ==",
       "peerDependencies": {
         "acorn": "^8"
       }
     },
-    "node_modules/webpack/node_modules/enhanced-resolve": {
-      "version": "5.10.0",
-      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.10.0.tgz",
-      "integrity": "sha512-T0yTFjdpldGY8PmuXXR0PyQ1ufZpEGiHVrp7zHKB7jdR4qlmZHhONVM5AQOAWXuF/w3dnHbEQVrNptJgt7F+cQ==",
-      "dependencies": {
-        "graceful-fs": "^4.2.4",
-        "tapable": "^2.2.0"
-      },
-      "engines": {
-        "node": ">=10.13.0"
-      }
-    },
     "node_modules/webpack/node_modules/schema-utils": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.1.tgz",
-      "integrity": "sha512-Y5PQxS4ITlC+EahLuXaY86TXfR7Dc5lw294alXOq86JAHCihAIZfqv8nNCWvaEJvaC51uN9hbLGeV0cFBdH+Fw==",
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz",
+      "integrity": "sha512-pN/yOAvcC+5rQ5nERGuwrjLlYvLTbCibnZ1I7B1LaiAz9BRBlE9GMgE/eqV30P7aJQUf7Ddimy/RsbYO/GrVGg==",
       "dependencies": {
         "@types/json-schema": "^7.0.8",
         "ajv": "^6.12.5",
@@ -8877,14 +8866,6 @@
         "url": "https://opencollective.com/webpack"
       }
     },
-    "node_modules/webpack/node_modules/tapable": {
-      "version": "2.2.1",
-      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
-      "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==",
-      "engines": {
-        "node": ">=6"
-      }
-    },
     "node_modules/webpack/node_modules/webpack-sources": {
       "version": "3.2.3",
       "resolved": "https://registry.npmjs.org/webpack-sources/-/webpack-sources-3.2.3.tgz",
@@ -10500,13 +10481,13 @@
       "dev": true
     },
     "@jridgewell/gen-mapping": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.2.tgz",
-      "integrity": "sha512-mh65xKQAzI6iBcFzwv28KVWSmCkdRBWoOh+bYQGW3+6OZvbbN3TqMGo5hqYxQniRcH9F2VZIoJCm4pa3BPDK/A==",
+      "version": "0.3.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz",
+      "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==",
       "requires": {
-        "@jridgewell/set-array": "^1.0.1",
+        "@jridgewell/set-array": "^1.2.1",
         "@jridgewell/sourcemap-codec": "^1.4.10",
-        "@jridgewell/trace-mapping": "^0.3.9"
+        "@jridgewell/trace-mapping": "^0.3.24"
       }
     },
     "@jridgewell/resolve-uri": {
@@ -10515,17 +10496,17 @@
       "integrity": "sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w=="
     },
     "@jridgewell/set-array": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz",
-      "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw=="
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz",
+      "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A=="
     },
     "@jridgewell/source-map": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.2.tgz",
-      "integrity": "sha512-m7O9o2uR8k2ObDysZYzdfhb08VuEml5oWGiosa1VdaPZ/A6QyPkAJuwN0Q1lhULOf6B7MtQmHENS743hWtCrgw==",
+      "version": "0.3.6",
+      "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.6.tgz",
+      "integrity": "sha512-1ZJTZebgqllO79ue2bm3rIGud/bOe0pP5BjSRCRxxYkEZS8STV7zN84UBbiYu7jy+eCKSnVIUgoWWE/tt+shMQ==",
       "requires": {
-        "@jridgewell/gen-mapping": "^0.3.0",
-        "@jridgewell/trace-mapping": "^0.3.9"
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.25"
       }
     },
     "@jridgewell/sourcemap-codec": {
@@ -10534,9 +10515,9 @@
       "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw=="
     },
     "@jridgewell/trace-mapping": {
-      "version": "0.3.19",
-      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.19.tgz",
-      "integrity": "sha512-kf37QtfW+Hwx/buWGMPcR60iF9ziHa6r/CZJIHbmcm4+0qrXiVdxegAH0F6yddEVQ7zdkjcGCgCzUu+BcbhQxw==",
+      "version": "0.3.25",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz",
+      "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==",
       "requires": {
         "@jridgewell/resolve-uri": "^3.1.0",
         "@jridgewell/sourcemap-codec": "^1.4.14"
@@ -10598,24 +10579,6 @@
         "@types/node": "*"
       }
     },
-    "@types/eslint": {
-      "version": "8.4.6",
-      "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.4.6.tgz",
-      "integrity": "sha512-/fqTbjxyFUaYNO7VcW5g+4npmqVACz1bB7RTHYuLj+PRjw9hrCwrUXVQFpChUS0JsyEFvMZ7U/PfmvWgxJhI9g==",
-      "requires": {
-        "@types/estree": "*",
-        "@types/json-schema": "*"
-      }
-    },
-    "@types/eslint-scope": {
-      "version": "3.7.4",
-      "resolved": "https://registry.npmjs.org/@types/eslint-scope/-/eslint-scope-3.7.4.tgz",
-      "integrity": "sha512-9K4zoImiZc3HlIp6AVUDE4CWYx22a+lhSZMYNpbjW04+YF0KWj4pJXnEMjdnFTiQibFFmElcsasJXDbdI/EPhA==",
-      "requires": {
-        "@types/eslint": "*",
-        "@types/estree": "*"
-      }
-    },
     "@types/eslint-visitor-keys": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/@types/eslint-visitor-keys/-/eslint-visitor-keys-1.0.0.tgz",
@@ -10623,9 +10586,9 @@
       "dev": true
     },
     "@types/estree": {
-      "version": "0.0.51",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-0.0.51.tgz",
-      "integrity": "sha512-CuPgU6f3eT/XgKKPqKd/gLZV1Xmvf1a2R5POBOGQa6uv82xpls89HU5zKeVoyR8XzHd1RGNOlQlvUe3CFkjWNQ=="
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz",
+      "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw=="
     },
     "@types/express": {
       "version": "4.17.13",
@@ -10935,133 +10898,133 @@
       }
     },
     "@webassemblyjs/ast": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.11.1.tgz",
-      "integrity": "sha512-ukBh14qFLjxTQNTXocdyksN5QdM28S1CxHt2rdskFyL+xFV7VremuBLVbmCePj+URalXBENx/9Lm7lnhihtCSw==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.12.1.tgz",
+      "integrity": "sha512-EKfMUOPRRUTy5UII4qJDGPpqfwjOmZ5jeGFwid9mnoqIFK+e0vqoi1qH56JpmZSzEL53jKnNzScdmftJyG5xWg==",
       "requires": {
-        "@webassemblyjs/helper-numbers": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1"
+        "@webassemblyjs/helper-numbers": "1.11.6",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6"
       }
     },
     "@webassemblyjs/floating-point-hex-parser": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.1.tgz",
-      "integrity": "sha512-iGRfyc5Bq+NnNuX8b5hwBrRjzf0ocrJPI6GWFodBFzmFnyvrQ83SHKhmilCU/8Jv67i4GJZBMhEzltxzcNagtQ=="
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.6.tgz",
+      "integrity": "sha512-ejAj9hfRJ2XMsNHk/v6Fu2dGS+i4UaXBXGemOfQ/JfQ6mdQg/WXtwleQRLLS4OvfDhv8rYnVwH27YJLMyYsxhw=="
     },
     "@webassemblyjs/helper-api-error": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.1.tgz",
-      "integrity": "sha512-RlhS8CBCXfRUR/cwo2ho9bkheSXG0+NwooXcc3PAILALf2QLdFyj7KGsKRbVc95hZnhnERon4kW/D3SZpp6Tcg=="
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.6.tgz",
+      "integrity": "sha512-o0YkoP4pVu4rN8aTJgAyj9hC2Sv5UlkzCHhxqWj8butaLvnpdc2jOwh4ewE6CX0txSfLn/UYaV/pheS2Txg//Q=="
     },
     "@webassemblyjs/helper-buffer": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.11.1.tgz",
-      "integrity": "sha512-gwikF65aDNeeXa8JxXa2BAk+REjSyhrNC9ZwdT0f8jc4dQQeDQ7G4m0f2QCLPJiMTTO6wfDmRmj/pW0PsUvIcA=="
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.12.1.tgz",
+      "integrity": "sha512-nzJwQw99DNDKr9BVCOZcLuJJUlqkJh+kVzVl6Fmq/tI5ZtEyWT1KZMyOXltXLZJmDtvLCDgwsyrkohEtopTXCw=="
     },
     "@webassemblyjs/helper-numbers": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.1.tgz",
-      "integrity": "sha512-vDkbxiB8zfnPdNK9Rajcey5C0w+QJugEglN0of+kmO8l7lDb77AnlKYQF7aarZuCrv+l0UvqL+68gSDr3k9LPQ==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.6.tgz",
+      "integrity": "sha512-vUIhZ8LZoIWHBohiEObxVm6hwP034jwmc9kuq5GdHZH0wiLVLIPcMCdpJzG4C11cHoQ25TFIQj9kaVADVX7N3g==",
       "requires": {
-        "@webassemblyjs/floating-point-hex-parser": "1.11.1",
-        "@webassemblyjs/helper-api-error": "1.11.1",
+        "@webassemblyjs/floating-point-hex-parser": "1.11.6",
+        "@webassemblyjs/helper-api-error": "1.11.6",
         "@xtuc/long": "4.2.2"
       }
     },
     "@webassemblyjs/helper-wasm-bytecode": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.1.tgz",
-      "integrity": "sha512-PvpoOGiJwXeTrSf/qfudJhwlvDQxFgelbMqtq52WWiXC6Xgg1IREdngmPN3bs4RoO83PnL/nFrxucXj1+BX62Q=="
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.6.tgz",
+      "integrity": "sha512-sFFHKwcmBprO9e7Icf0+gddyWYDViL8bpPjJJl0WHxCdETktXdmtWLGVzoHbqUcY4Be1LkNfwTmXOJUFZYSJdA=="
     },
     "@webassemblyjs/helper-wasm-section": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.11.1.tgz",
-      "integrity": "sha512-10P9No29rYX1j7F3EVPX3JvGPQPae+AomuSTPiF9eBQeChHI6iqjMIwR9JmOJXwpnn/oVGDk7I5IlskuMwU/pg==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.12.1.tgz",
+      "integrity": "sha512-Jif4vfB6FJlUlSbgEMHUyk1j234GTNG9dBJ4XJdOySoj518Xj0oGsNi59cUQF4RRMS9ouBUxDDdyBVfPTypa5g==",
       "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/wasm-gen": "1.12.1"
       }
     },
     "@webassemblyjs/ieee754": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.1.tgz",
-      "integrity": "sha512-hJ87QIPtAMKbFq6CGTkZYJivEwZDbQUgYd3qKSadTNOhVY7p+gfP6Sr0lLRVTaG1JjFj+r3YchoqRYxNH3M0GQ==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.6.tgz",
+      "integrity": "sha512-LM4p2csPNvbij6U1f19v6WR56QZ8JcHg3QIJTlSwzFcmx6WSORicYj6I63f9yU1kEUtrpG+kjkiIAkevHpDXrg==",
       "requires": {
         "@xtuc/ieee754": "^1.2.0"
       }
     },
     "@webassemblyjs/leb128": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.1.tgz",
-      "integrity": "sha512-BJ2P0hNZ0u+Th1YZXJpzW6miwqQUGcIHT1G/sf72gLVD9DZ5AdYTqPNbHZh6K1M5VmKvFXwGSWZADz+qBWxeRw==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.6.tgz",
+      "integrity": "sha512-m7a0FhE67DQXgouf1tbN5XQcdWoNgaAuoULHIfGFIEVKA6tu/edls6XnIlkmS6FrXAquJRPni3ZZKjw6FSPjPQ==",
       "requires": {
         "@xtuc/long": "4.2.2"
       }
     },
     "@webassemblyjs/utf8": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.1.tgz",
-      "integrity": "sha512-9kqcxAEdMhiwQkHpkNiorZzqpGrodQQ2IGrHHxCy+Ozng0ofyMA0lTqiLkVs1uzTRejX+/O0EOT7KxqVPuXosQ=="
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.6.tgz",
+      "integrity": "sha512-vtXf2wTQ3+up9Zsg8sa2yWiQpzSsMyXj0qViVP6xKGCUT8p8YJ6HqI7l5eCnWx1T/FYdsv07HQs2wTFbbof/RA=="
     },
     "@webassemblyjs/wasm-edit": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.11.1.tgz",
-      "integrity": "sha512-g+RsupUC1aTHfR8CDgnsVRVZFJqdkFHpsHMfJuWQzWU3tvnLC07UqHICfP+4XyL2tnr1amvl1Sdp06TnYCmVkA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.12.1.tgz",
+      "integrity": "sha512-1DuwbVvADvS5mGnXbE+c9NfA8QRcZ6iKquqjjmR10k6o+zzsRVesil54DKexiowcFCPdr/Q0qaMgB01+SQ1u6g==",
       "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/helper-wasm-section": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1",
-        "@webassemblyjs/wasm-opt": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1",
-        "@webassemblyjs/wast-printer": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/helper-wasm-section": "1.12.1",
+        "@webassemblyjs/wasm-gen": "1.12.1",
+        "@webassemblyjs/wasm-opt": "1.12.1",
+        "@webassemblyjs/wasm-parser": "1.12.1",
+        "@webassemblyjs/wast-printer": "1.12.1"
       }
     },
     "@webassemblyjs/wasm-gen": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.11.1.tgz",
-      "integrity": "sha512-F7QqKXwwNlMmsulj6+O7r4mmtAlCWfO/0HdgOxSklZfQcDu0TpLiD1mRt/zF25Bk59FIjEuGAIyn5ei4yMfLhA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.12.1.tgz",
+      "integrity": "sha512-TDq4Ojh9fcohAw6OIMXqiIcTq5KUXTGRkVxbSo1hQnSy6lAM5GSdfwWeSxpAo0YzgsgF182E/U0mDNhuA0tW7w==",
       "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/ieee754": "1.11.1",
-        "@webassemblyjs/leb128": "1.11.1",
-        "@webassemblyjs/utf8": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/ieee754": "1.11.6",
+        "@webassemblyjs/leb128": "1.11.6",
+        "@webassemblyjs/utf8": "1.11.6"
       }
     },
     "@webassemblyjs/wasm-opt": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.11.1.tgz",
-      "integrity": "sha512-VqnkNqnZlU5EB64pp1l7hdm3hmQw7Vgqa0KF/KCNO9sIpI6Fk6brDEiX+iCOYrvMuBWDws0NkTOxYEb85XQHHw==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.12.1.tgz",
+      "integrity": "sha512-Jg99j/2gG2iaz3hijw857AVYekZe2SAskcqlWIZXjji5WStnOpVoat3gQfT/Q5tb2djnCjBtMocY/Su1GfxPBg==",
       "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-buffer": "1.11.1",
-        "@webassemblyjs/wasm-gen": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/wasm-gen": "1.12.1",
+        "@webassemblyjs/wasm-parser": "1.12.1"
       }
     },
     "@webassemblyjs/wasm-parser": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.11.1.tgz",
-      "integrity": "sha512-rrBujw+dJu32gYB7/Lup6UhdkPx9S9SnobZzRVL7VcBH9Bt9bCBLEuX/YXOOtBsOZ4NQrRykKhffRWHvigQvOA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.12.1.tgz",
+      "integrity": "sha512-xikIi7c2FHXysxXe3COrVUPSheuBtpcfhbpFj4gmu7KRLYOzANztwUU0IbsqvMqzuNK2+glRGWCEqZo1WCLyAQ==",
       "requires": {
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/helper-api-error": "1.11.1",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.1",
-        "@webassemblyjs/ieee754": "1.11.1",
-        "@webassemblyjs/leb128": "1.11.1",
-        "@webassemblyjs/utf8": "1.11.1"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-api-error": "1.11.6",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/ieee754": "1.11.6",
+        "@webassemblyjs/leb128": "1.11.6",
+        "@webassemblyjs/utf8": "1.11.6"
       }
     },
     "@webassemblyjs/wast-printer": {
-      "version": "1.11.1",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.11.1.tgz",
-      "integrity": "sha512-IQboUWM4eKzWW+N/jij2sRatKMh99QEelo3Eb2q0qXkvPRISAj8Qxtmw5itwqK+TTkBuUIE45AxYPToqPtL5gg==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.12.1.tgz",
+      "integrity": "sha512-+X4WAlOisVWQMikjbcvY2e0rwPsKQ9F688lksZhBcPycBBuii3O7m8FACbDMWDojpAqvjIncrG8J0XHKyQfVeA==",
       "requires": {
-        "@webassemblyjs/ast": "1.11.1",
+        "@webassemblyjs/ast": "1.12.1",
         "@xtuc/long": "4.2.2"
       }
     },
@@ -11447,14 +11410,14 @@
       }
     },
     "browserslist": {
-      "version": "4.21.9",
-      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.9.tgz",
-      "integrity": "sha512-M0MFoZzbUrRU4KNfCrDLnvyE7gub+peetoTid3TBIqtunaDJyXlwhakT+/VkvSXcfIzFfK/nkCs4nmyTmxdNSg==",
+      "version": "4.23.3",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.3.tgz",
+      "integrity": "sha512-btwCFJVjI4YWDNfau8RhZ+B1Q/VLoUITrm3RlP6y1tYGWIOa+InuYiRGXUBXo8nA1qKmHMyLB/iVQg5TT4eFoA==",
       "requires": {
-        "caniuse-lite": "^1.0.30001503",
-        "electron-to-chromium": "^1.4.431",
-        "node-releases": "^2.0.12",
-        "update-browserslist-db": "^1.0.11"
+        "caniuse-lite": "^1.0.30001646",
+        "electron-to-chromium": "^1.5.4",
+        "node-releases": "^2.0.18",
+        "update-browserslist-db": "^1.1.0"
       }
     },
     "buffer-from": {
@@ -11489,9 +11452,9 @@
       "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg=="
     },
     "caniuse-lite": {
-      "version": "1.0.30001517",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001517.tgz",
-      "integrity": "sha512-Vdhm5S11DaFVLlyiKu4hiUTkpZu+y1KA/rZZqVQfOD5YdDT/eQKlkt7NaE0WGOFgX32diqt9MiP9CAiFeRklaA=="
+      "version": "1.0.30001653",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001653.tgz",
+      "integrity": "sha512-XGWQVB8wFQ2+9NZwZ10GxTYC5hk0Fa+q8cSkr0tgvMhYhMHP/QC+WTgrePMDBWiWc/pV+1ik82Al20XOK25Gcw=="
     },
     "chalk": {
       "version": "2.4.2",
@@ -12051,9 +12014,9 @@
       "dev": true
     },
     "electron-to-chromium": {
-      "version": "1.4.467",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.467.tgz",
-      "integrity": "sha512-2qI70O+rR4poYeF2grcuS/bCps5KJh6y1jtZMDDEteyKJQrzLOEhFyXCLcHW6DTBjKjWkk26JhWoAi+Ux9A0fg=="
+      "version": "1.5.13",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.13.tgz",
+      "integrity": "sha512-lbBcvtIJ4J6sS4tb5TLp1b4LyfCdMkwStzXPyAgVgTRAsep4bvrAGaBOP7ZJtQMNJpSQ9SqG4brWOroNaQtm7Q=="
     },
     "emoji-regex": {
       "version": "8.0.0",
@@ -12072,6 +12035,15 @@
       "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
       "dev": true
     },
+    "enhanced-resolve": {
+      "version": "5.17.1",
+      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz",
+      "integrity": "sha512-LMHl3dXhTcfv8gM4kEzIUeTQ+7fpdA0l2tUf34BddXPkz2A5xJ5L/Pchd5BL6rdccM9QGvu0sWZzK1Z1t4wwyg==",
+      "requires": {
+        "graceful-fs": "^4.2.4",
+        "tapable": "^2.2.0"
+      }
+    },
     "enquirer": {
       "version": "2.3.6",
       "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz",
@@ -12129,9 +12101,9 @@
       }
     },
     "es-module-lexer": {
-      "version": "0.9.3",
-      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-0.9.3.tgz",
-      "integrity": "sha512-1HQ2M2sPtxwnvOvT1ZClHyQDiggdNjURWpY2we6aMKCQiUVxTmVs2UYPLIrD84sS+kMdUwfBSylbJPwNnBrnHQ=="
+      "version": "1.5.4",
+      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.5.4.tgz",
+      "integrity": "sha512-MVNK56NiMrOwitFB7cqDwq0CQutbw+0BvLshJSse0MUNU+y1FC3bUS/AQg7oUng+/wKrrki7JfmwtVHkVfPLlw=="
     },
     "es-to-primitive": {
       "version": "1.2.1",
@@ -12145,9 +12117,9 @@
       }
     },
     "escalade": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
-      "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw=="
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz",
+      "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA=="
     },
     "escape-html": {
       "version": "1.0.3",
@@ -12822,9 +12794,9 @@
       }
     },
     "graceful-fs": {
-      "version": "4.2.10",
-      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz",
-      "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA=="
+      "version": "4.2.11",
+      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
+      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="
     },
     "gzip-size": {
       "version": "6.0.0",
@@ -13668,9 +13640,9 @@
       "dev": true
     },
     "node-releases": {
-      "version": "2.0.13",
-      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.13.tgz",
-      "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ=="
+      "version": "2.0.18",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz",
+      "integrity": "sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g=="
     },
     "normalize-path": {
       "version": "3.0.0",
@@ -13938,9 +13910,9 @@
       "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw=="
     },
     "picocolors": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz",
-      "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ=="
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz",
+      "integrity": "sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew=="
     },
     "picomatch": {
       "version": "2.3.1",
@@ -14702,6 +14674,14 @@
         }
       }
     },
+    "serialize-javascript": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz",
+      "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==",
+      "requires": {
+        "randombytes": "^2.1.0"
+      }
+    },
     "serve-index": {
       "version": "1.9.1",
       "resolved": "https://registry.npmjs.org/serve-index/-/serve-index-1.9.1.tgz",
@@ -15125,34 +15105,39 @@
         }
       }
     },
+    "tapable": {
+      "version": "2.2.1",
+      "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
+      "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ=="
+    },
     "terser": {
-      "version": "5.15.0",
-      "resolved": "https://registry.npmjs.org/terser/-/terser-5.15.0.tgz",
-      "integrity": "sha512-L1BJiXVmheAQQy+as0oF3Pwtlo4s3Wi1X2zNZ2NxOB4wx9bdS9Vk67XQENLFdLYGCK/Z2di53mTj/hBafR+dTA==",
+      "version": "5.31.6",
+      "resolved": "https://registry.npmjs.org/terser/-/terser-5.31.6.tgz",
+      "integrity": "sha512-PQ4DAriWzKj+qgehQ7LK5bQqCFNMmlhjR2PFFLuqGCpuCAauxemVBWwWOxo3UIwWQx8+Pr61Df++r76wDmkQBg==",
       "requires": {
-        "@jridgewell/source-map": "^0.3.2",
-        "acorn": "^8.5.0",
+        "@jridgewell/source-map": "^0.3.3",
+        "acorn": "^8.8.2",
         "commander": "^2.20.0",
         "source-map-support": "~0.5.20"
       },
       "dependencies": {
         "acorn": {
-          "version": "8.8.0",
-          "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.0.tgz",
-          "integrity": "sha512-QOxyigPVrpZ2GXT+PFyZTl6TtOFc5egxHIP9IlQ+RbupQuX4RkT/Bee4/kQuC02Xkzg84JcT7oLYtDIQxp+v7w=="
+          "version": "8.12.1",
+          "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz",
+          "integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg=="
         }
       }
     },
     "terser-webpack-plugin": {
-      "version": "5.3.6",
-      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.6.tgz",
-      "integrity": "sha512-kfLFk+PoLUQIbLmB1+PZDMRSZS99Mp+/MHqDNmMA6tOItzRt+Npe3E+fsMs5mfcM0wCtrrdU387UnV+vnSffXQ==",
+      "version": "5.3.10",
+      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.10.tgz",
+      "integrity": "sha512-BKFPWlPDndPs+NGGCr1U59t0XScL5317Y0UReNrHaw9/FwhPENlq6bfgs+4yPfyP51vqC1bQ4rp1EfXW5ZSH9w==",
       "requires": {
-        "@jridgewell/trace-mapping": "^0.3.14",
+        "@jridgewell/trace-mapping": "^0.3.20",
         "jest-worker": "^27.4.5",
         "schema-utils": "^3.1.1",
-        "serialize-javascript": "^6.0.0",
-        "terser": "^5.14.1"
+        "serialize-javascript": "^6.0.1",
+        "terser": "^5.26.0"
       },
       "dependencies": {
         "schema-utils": {
@@ -15164,14 +15149,6 @@
             "ajv": "^6.12.5",
             "ajv-keywords": "^3.5.2"
           }
-        },
-        "serialize-javascript": {
-          "version": "6.0.0",
-          "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz",
-          "integrity": "sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag==",
-          "requires": {
-            "randombytes": "^2.1.0"
-          }
         }
       }
     },
@@ -15317,12 +15294,12 @@
       "dev": true
     },
     "update-browserslist-db": {
-      "version": "1.0.11",
-      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.11.tgz",
-      "integrity": "sha512-dCwEFf0/oT85M1fHBg4F0jtLwJrutGoHSQXCh7u4o2t1drG+c0a9Flnqww6XUKSfQMPpJBRjU8d4RXB09qtvaA==",
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz",
+      "integrity": "sha512-EdRAaAyk2cUE1wOf2DkEhzxqOQvFOoRJFNS6NeyJ01Gp2beMRpBAINjM2iDXE3KCuKhwnvHIQCJm6ThL2Z+HzQ==",
       "requires": {
-        "escalade": "^3.1.1",
-        "picocolors": "^1.0.0"
+        "escalade": "^3.1.2",
+        "picocolors": "^1.0.1"
       }
     },
     "uri-js": {
@@ -15376,9 +15353,9 @@
       }
     },
     "watchpack": {
-      "version": "2.4.0",
-      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",
-      "integrity": "sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg==",
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.2.tgz",
+      "integrity": "sha512-TnbFSbcOCcDgjZ4piURLCbJ3nJhznVh9kw6F6iokjiFPl8ONxe9A6nMDVXDiNbrSfLILs6vB07F7wLBrwPYzJw==",
       "requires": {
         "glob-to-regexp": "^0.4.1",
         "graceful-fs": "^4.1.2"
@@ -15394,71 +15371,56 @@
       }
     },
     "webpack": {
-      "version": "5.76.0",
-      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.76.0.tgz",
-      "integrity": "sha512-l5sOdYBDunyf72HW8dF23rFtWq/7Zgvt/9ftMof71E/yUb1YLOBmTgA2K4vQthB3kotMrSj609txVE0dnr2fjA==",
-      "requires": {
-        "@types/eslint-scope": "^3.7.3",
-        "@types/estree": "^0.0.51",
-        "@webassemblyjs/ast": "1.11.1",
-        "@webassemblyjs/wasm-edit": "1.11.1",
-        "@webassemblyjs/wasm-parser": "1.11.1",
+      "version": "5.94.0",
+      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.94.0.tgz",
+      "integrity": "sha512-KcsGn50VT+06JH/iunZJedYGUJS5FGjow8wb9c0v5n1Om8O1g4L6LjtfxwlXIATopoQu+vOXXa7gYisWxCoPyg==",
+      "requires": {
+        "@types/estree": "^1.0.5",
+        "@webassemblyjs/ast": "^1.12.1",
+        "@webassemblyjs/wasm-edit": "^1.12.1",
+        "@webassemblyjs/wasm-parser": "^1.12.1",
         "acorn": "^8.7.1",
-        "acorn-import-assertions": "^1.7.6",
-        "browserslist": "^4.14.5",
+        "acorn-import-attributes": "^1.9.5",
+        "browserslist": "^4.21.10",
         "chrome-trace-event": "^1.0.2",
-        "enhanced-resolve": "^5.10.0",
-        "es-module-lexer": "^0.9.0",
+        "enhanced-resolve": "^5.17.1",
+        "es-module-lexer": "^1.2.1",
         "eslint-scope": "5.1.1",
         "events": "^3.2.0",
         "glob-to-regexp": "^0.4.1",
-        "graceful-fs": "^4.2.9",
+        "graceful-fs": "^4.2.11",
         "json-parse-even-better-errors": "^2.3.1",
         "loader-runner": "^4.2.0",
         "mime-types": "^2.1.27",
         "neo-async": "^2.6.2",
-        "schema-utils": "^3.1.0",
+        "schema-utils": "^3.2.0",
         "tapable": "^2.1.1",
-        "terser-webpack-plugin": "^5.1.3",
-        "watchpack": "^2.4.0",
+        "terser-webpack-plugin": "^5.3.10",
+        "watchpack": "^2.4.1",
         "webpack-sources": "^3.2.3"
       },
       "dependencies": {
         "acorn": {
-          "version": "8.8.0",
-          "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.0.tgz",
-          "integrity": "sha512-QOxyigPVrpZ2GXT+PFyZTl6TtOFc5egxHIP9IlQ+RbupQuX4RkT/Bee4/kQuC02Xkzg84JcT7oLYtDIQxp+v7w=="
+          "version": "8.12.1",
+          "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz",
+          "integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg=="
         },
-        "acorn-import-assertions": {
-          "version": "1.8.0",
-          "resolved": "https://registry.npmjs.org/acorn-import-assertions/-/acorn-import-assertions-1.8.0.tgz",
-          "integrity": "sha512-m7VZ3jwz4eK6A4Vtt8Ew1/mNbP24u0FhdyfA7fSvnJR6LMdfOYnmuIrrJAgrYfYJ10F/otaHTtrtrtmHdMNzEw==",
+        "acorn-import-attributes": {
+          "version": "1.9.5",
+          "resolved": "https://registry.npmjs.org/acorn-import-attributes/-/acorn-import-attributes-1.9.5.tgz",
+          "integrity": "sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ==",
           "requires": {}
         },
-        "enhanced-resolve": {
-          "version": "5.10.0",
-          "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.10.0.tgz",
-          "integrity": "sha512-T0yTFjdpldGY8PmuXXR0PyQ1ufZpEGiHVrp7zHKB7jdR4qlmZHhONVM5AQOAWXuF/w3dnHbEQVrNptJgt7F+cQ==",
-          "requires": {
-            "graceful-fs": "^4.2.4",
-            "tapable": "^2.2.0"
-          }
-        },
         "schema-utils": {
-          "version": "3.1.1",
-          "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.1.tgz",
-          "integrity": "sha512-Y5PQxS4ITlC+EahLuXaY86TXfR7Dc5lw294alXOq86JAHCihAIZfqv8nNCWvaEJvaC51uN9hbLGeV0cFBdH+Fw==",
+          "version": "3.3.0",
+          "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz",
+          "integrity": "sha512-pN/yOAvcC+5rQ5nERGuwrjLlYvLTbCibnZ1I7B1LaiAz9BRBlE9GMgE/eqV30P7aJQUf7Ddimy/RsbYO/GrVGg==",
           "requires": {
             "@types/json-schema": "^7.0.8",
             "ajv": "^6.12.5",
             "ajv-keywords": "^3.5.2"
           }
         },
-        "tapable": {
-          "version": "2.2.1",
-          "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz",
-          "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ=="
-        },
         "webpack-sources": {
           "version": "3.2.3",
           "resolved": "https://registry.npmjs.org/webpack-sources/-/webpack-sources-3.2.3.tgz",
diff --git a/cl/package.json b/cl/package.json
index 4beb099dfd..106063b630 100644
--- a/cl/package.json
+++ b/cl/package.json
@@ -43,7 +43,7 @@
     "react-virtual": "^2.2.1",
     "terser-webpack-plugin": "^5.3.6",
     "typescript": "^4.2.4",
-    "webpack": "^5.76.0",
+    "webpack": "^5.94.0",
     "webpack-bundle-analyzer": "^4.4.1",
     "webpack-cli": "^4.10.0",
     "webpack-dev-server": "^4.10.0"

From f8fe377c04c37501fbcccb28325f5fba682c753b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 29 Aug 2024 15:44:21 +0000
Subject: [PATCH 280/372] build(deps): bump twisted from 23.10.0 to 24.7.0

Bumps [twisted](https://github.com/twisted/twisted) from 23.10.0 to 24.7.0.
- [Release notes](https://github.com/twisted/twisted/releases)
- [Changelog](https://github.com/twisted/twisted/blob/trunk/NEWS.rst)
- [Commits](https://github.com/twisted/twisted/compare/twisted-23.10.0...twisted-24.7.0)

---
updated-dependencies:
- dependency-name: twisted
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock | 67 ++++++++++++++++-------------------------------------
 1 file changed, 20 insertions(+), 47 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index f085fbe921..ce4d6cbc35 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2026,18 +2026,20 @@ test-musl = ["cairocffi (>=1.2.0)", "networkx (>=2.5)", "pytest (>=7.0.1)", "pyt
 
 [[package]]
 name = "incremental"
-version = "22.10.0"
-description = "\"A small library that versions your Python projects.\""
+version = "24.7.2"
+description = "A small library that versions your Python projects."
 optional = false
-python-versions = "*"
+python-versions = ">=3.8"
 files = [
-    {file = "incremental-22.10.0-py2.py3-none-any.whl", hash = "sha256:b864a1f30885ee72c5ac2835a761b8fe8aa9c28b9395cacf27286602688d3e51"},
-    {file = "incremental-22.10.0.tar.gz", hash = "sha256:912feeb5e0f7e0188e6f42241d2f450002e11bbc0937c65865045854c24c0bd0"},
+    {file = "incremental-24.7.2-py3-none-any.whl", hash = "sha256:8cb2c3431530bec48ad70513931a760f446ad6c25e8333ca5d95e24b0ed7b8fe"},
+    {file = "incremental-24.7.2.tar.gz", hash = "sha256:fb4f1d47ee60efe87d4f6f0ebb5f70b9760db2b2574c59c8e8912be4ebd464c9"},
 ]
 
+[package.dependencies]
+setuptools = ">=61.0"
+
 [package.extras]
-mypy = ["click (>=6.0)", "mypy (==0.812)", "twisted (>=16.4.0)"]
-scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
+scripts = ["click (>=6.0)"]
 
 [[package]]
 name = "iniconfig"
@@ -4841,13 +4843,13 @@ wsproto = ">=0.14"
 
 [[package]]
 name = "twisted"
-version = "23.10.0"
+version = "24.7.0"
 description = "An asynchronous networking framework written in Python"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "twisted-23.10.0-py3-none-any.whl", hash = "sha256:4ae8bce12999a35f7fe6443e7f1893e6fe09588c8d2bed9c35cdce8ff2d5b444"},
-    {file = "twisted-23.10.0.tar.gz", hash = "sha256:987847a0790a2c597197613686e2784fd54167df3a55d0fb17c8412305d76ce5"},
+    {file = "twisted-24.7.0-py3-none-any.whl", hash = "sha256:734832ef98108136e222b5230075b1079dad8a3fc5637319615619a7725b0c81"},
+    {file = "twisted-24.7.0.tar.gz", hash = "sha256:5a60147f044187a127ec7da96d170d49bcce50c6fd36f594e60f4587eff4d394"},
 ]
 
 [package.dependencies]
@@ -4856,55 +4858,26 @@ automat = ">=0.8.0"
 constantly = ">=15.1"
 hyperlink = ">=17.1.1"
 idna = {version = ">=2.4", optional = true, markers = "extra == \"tls\""}
-incremental = ">=22.10.0"
+incremental = ">=24.7.0"
 pyopenssl = {version = ">=21.0.0", optional = true, markers = "extra == \"tls\""}
 service-identity = {version = ">=18.1.0", optional = true, markers = "extra == \"tls\""}
-twisted-iocpsupport = {version = ">=1.0.2,<2", markers = "platform_system == \"Windows\""}
 typing-extensions = ">=4.2.0"
 zope-interface = ">=5"
 
 [package.extras]
-all-non-platform = ["twisted[conch,http2,serial,test,tls]", "twisted[conch,http2,serial,test,tls]"]
+all-non-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)"]
 conch = ["appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)"]
-dev = ["coverage (>=6b1,<7)", "pyflakes (>=2.2,<3.0)", "python-subunit (>=1.4,<2.0)", "twisted[dev-release]", "twistedchecker (>=0.7,<1.0)"]
+dev = ["coverage (>=7.5,<8.0)", "cython-test-exception-raiser (>=1.0.2,<2)", "hypothesis (>=6.56)", "pydoctor (>=23.9.0,<23.10.0)", "pyflakes (>=2.2,<3.0)", "pyhamcrest (>=2)", "python-subunit (>=1.4,<2.0)", "sphinx (>=6,<7)", "sphinx-rtd-theme (>=1.3,<2.0)", "towncrier (>=23.6,<24.0)", "twistedchecker (>=0.7,<1.0)"]
 dev-release = ["pydoctor (>=23.9.0,<23.10.0)", "pydoctor (>=23.9.0,<23.10.0)", "sphinx (>=6,<7)", "sphinx (>=6,<7)", "sphinx-rtd-theme (>=1.3,<2.0)", "sphinx-rtd-theme (>=1.3,<2.0)", "towncrier (>=23.6,<24.0)", "towncrier (>=23.6,<24.0)"]
-gtk-platform = ["pygobject", "pygobject", "twisted[all-non-platform]", "twisted[all-non-platform]"]
+gtk-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pygobject", "pygobject", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)"]
 http2 = ["h2 (>=3.0,<5.0)", "priority (>=1.1.0,<2.0)"]
-macos-platform = ["pyobjc-core", "pyobjc-core", "pyobjc-framework-cfnetwork", "pyobjc-framework-cfnetwork", "pyobjc-framework-cocoa", "pyobjc-framework-cocoa", "twisted[all-non-platform]", "twisted[all-non-platform]"]
-mypy = ["mypy (>=1.5.1,<1.6.0)", "mypy-zope (>=1.0.1,<1.1.0)", "twisted[all-non-platform,dev]", "types-pyopenssl", "types-setuptools"]
-osx-platform = ["twisted[macos-platform]", "twisted[macos-platform]"]
+macos-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyobjc-core", "pyobjc-core", "pyobjc-framework-cfnetwork", "pyobjc-framework-cfnetwork", "pyobjc-framework-cocoa", "pyobjc-framework-cocoa", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)"]
+mypy = ["appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "coverage (>=7.5,<8.0)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "idna (>=2.4)", "mypy (>=1.8,<2.0)", "mypy-zope (>=1.0.3,<1.1.0)", "priority (>=1.1.0,<2.0)", "pydoctor (>=23.9.0,<23.10.0)", "pyflakes (>=2.2,<3.0)", "pyhamcrest (>=2)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "python-subunit (>=1.4,<2.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "sphinx (>=6,<7)", "sphinx-rtd-theme (>=1.3,<2.0)", "towncrier (>=23.6,<24.0)", "twistedchecker (>=0.7,<1.0)", "types-pyopenssl", "types-setuptools"]
+osx-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyobjc-core", "pyobjc-core", "pyobjc-framework-cfnetwork", "pyobjc-framework-cfnetwork", "pyobjc-framework-cocoa", "pyobjc-framework-cocoa", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)"]
 serial = ["pyserial (>=3.0)", "pywin32 (!=226)"]
 test = ["cython-test-exception-raiser (>=1.0.2,<2)", "hypothesis (>=6.56)", "pyhamcrest (>=2)"]
 tls = ["idna (>=2.4)", "pyopenssl (>=21.0.0)", "service-identity (>=18.1.0)"]
-windows-platform = ["pywin32 (!=226)", "pywin32 (!=226)", "twisted[all-non-platform]", "twisted[all-non-platform]"]
-
-[[package]]
-name = "twisted-iocpsupport"
-version = "1.0.4"
-description = "An extension for use in the twisted I/O Completion Ports reactor."
-optional = false
-python-versions = "*"
-files = [
-    {file = "twisted-iocpsupport-1.0.4.tar.gz", hash = "sha256:858096c0d15e33f15ac157f455d8f86f2f2cdd223963e58c0f682a3af8362d89"},
-    {file = "twisted_iocpsupport-1.0.4-cp310-cp310-win32.whl", hash = "sha256:afa2b630797f9ed2f27f3d9f55e3f72b4244911e45a8c82756f44babbf0b243e"},
-    {file = "twisted_iocpsupport-1.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:0058c963c8957bcd3deda62122e89953c9de1e867a274facc9b15dde1a9f31e8"},
-    {file = "twisted_iocpsupport-1.0.4-cp311-cp311-win32.whl", hash = "sha256:196f7c7ccad4ba4d1783b1c4e1d1b22d93c04275cd780bf7498d16c77319ad6e"},
-    {file = "twisted_iocpsupport-1.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:4e5f97bcbabdd79cbaa969b63439b89801ea560f11d42b0a387634275c633623"},
-    {file = "twisted_iocpsupport-1.0.4-cp312-cp312-win32.whl", hash = "sha256:6081bd7c2f4fcf9b383dcdb3b3385d75a26a7c9d2be25b6950c3d8ea652d2d2d"},
-    {file = "twisted_iocpsupport-1.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:76f7e67cec1f1d097d1f4ed7de41be3d74546e1a4ede0c7d56e775c4dce5dfb0"},
-    {file = "twisted_iocpsupport-1.0.4-cp36-cp36m-win32.whl", hash = "sha256:3d306fc4d88a6bcf61ce9d572c738b918578121bfd72891625fab314549024b5"},
-    {file = "twisted_iocpsupport-1.0.4-cp36-cp36m-win_amd64.whl", hash = "sha256:391ac4d6002a80e15f35adc4ad6056f4fe1c17ceb0d1f98ba01b0f4f917adfd7"},
-    {file = "twisted_iocpsupport-1.0.4-cp37-cp37m-win32.whl", hash = "sha256:0c1b5cf37f0b2d96cc3c9bc86fff16613b9f5d0ca565c96cf1f1fb8cfca4b81c"},
-    {file = "twisted_iocpsupport-1.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:3c5dc11d72519e55f727320e3cee535feedfaee09c0f0765ed1ca7badff1ab3c"},
-    {file = "twisted_iocpsupport-1.0.4-cp38-cp38-win32.whl", hash = "sha256:cc86c2ef598c15d824a243c2541c29459881c67fc3c0adb6efe2242f8f0ec3af"},
-    {file = "twisted_iocpsupport-1.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:c27985e949b9b1a1fb4c20c71d315c10ea0f93fdf3ccdd4a8c158b5926edd8c8"},
-    {file = "twisted_iocpsupport-1.0.4-cp39-cp39-win32.whl", hash = "sha256:e311dfcb470696e3c077249615893cada598e62fa7c4e4ca090167bd2b7d331f"},
-    {file = "twisted_iocpsupport-1.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:4574eef1f3bb81501fb02f911298af3c02fe8179c31a33b361dd49180c3e644d"},
-    {file = "twisted_iocpsupport-1.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:872747a3b64e2909aee59c803ccd0bceb9b75bf27915520ebd32d69687040fa2"},
-    {file = "twisted_iocpsupport-1.0.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:c2712b778bacf1db434e3e065adfed3db300754186a29aecac1efae9ef4bcaff"},
-    {file = "twisted_iocpsupport-1.0.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7c66fa0aa4236b27b3c61cb488662d85dae746a6d1c7b0d91cf7aae118445adf"},
-    {file = "twisted_iocpsupport-1.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:300437af17396a945a58dcfffd77863303a8b6d9e65c6e81f1d2eed55b50d444"},
-]
+windows-platform = ["appdirs (>=1.4.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.1.3)", "bcrypt (>=3.1.3)", "cryptography (>=3.3)", "cryptography (>=3.3)", "cython-test-exception-raiser (>=1.0.2,<2)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.56)", "hypothesis (>=6.56)", "idna (>=2.4)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "priority (>=1.1.0,<2.0)", "pyhamcrest (>=2)", "pyhamcrest (>=2)", "pyopenssl (>=21.0.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)", "service-identity (>=18.1.0)", "twisted-iocpsupport (>=1.0.2)", "twisted-iocpsupport (>=1.0.2)"]
 
 [[package]]
 name = "txaio"

From 6f004305cdbc5379b8bd0f0b8823de418c0f453f Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Thu, 29 Aug 2024 11:08:02 -0500
Subject: [PATCH 281/372] fix(scrapers.DupChecker): ensure raising
 SingleDuplicateError

A bug was introduced in the previous modification, where SingleDuplicateError was not raised when item was duplicated
---
 cl/scrapers/DupChecker.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/cl/scrapers/DupChecker.py b/cl/scrapers/DupChecker.py
index 10cac56334..a5a5df729d 100644
--- a/cl/scrapers/DupChecker.py
+++ b/cl/scrapers/DupChecker.py
@@ -135,19 +135,20 @@ def press_on(
         else:
             already_scraped_next_date = True
 
+        # When in a full crawl, we do not raise a loop breaking
+        # `ConsecutiveDuplicatesError`
         if not self.full_crawl:
             if already_scraped_next_date:
                 if self.court.pk == "mich":
                     # Michigan sometimes has multiple occurrences of the
                     # same case with different dates on a page.
                     raise SingleDuplicateError(logger=logger)
-                else:
-                    message = "Next case occurs prior to when we found a duplicate. Court is up to date."
-                    raise ConsecutiveDuplicatesError(message, logger=logger)
+
+                message = "Next case occurs prior to when we found a duplicate. Court is up to date."
+                raise ConsecutiveDuplicatesError(message, logger=logger)
             elif self.dup_count >= self.dup_threshold:
                 message = f"Found {self.dup_count} duplicates in a row. Court is up to date."
                 raise ConsecutiveDuplicatesError(message, logger=logger)
-        else:
-            # This is a full crawl. Do not raise a loop breaking `ConsecutiveDuplicatesError`,
-            # but say that we shouldn't press on, since the item already exists.
-            raise SingleDuplicateError(logger=logger)
+
+        # Full crawl or not, this is a duplicate and we shouldn't store it
+        raise SingleDuplicateError(logger=logger)

From 75404184ff427e4c133bbde355f244da9d257a95 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 29 Aug 2024 12:51:05 -0600
Subject: [PATCH 282/372] feat(pacer_free_documents): update function to get
 pdfs

---
 .../commands/scrape_pacer_free_opinions.py    | 86 +++++++------------
 1 file changed, 31 insertions(+), 55 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index b09ca000a1..73e4562916 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -16,6 +16,9 @@
 from requests import RequestException
 from urllib3.exceptions import ReadTimeoutError
 
+from cl.corpus_importer.management.commands.bulk_iquery_project import (
+    CycleChecker,
+)
 from cl.corpus_importer.tasks import (
     delete_pacer_row,
     get_and_process_free_pdf,
@@ -217,12 +220,11 @@ def get_and_save_free_document_reports(
         # point for the sweep
         dates = make_date_range_tuples(date_start, date_end, gap=7)
 
-    if not dates:
-        # We are not using a custom date range, it is the daily cron
-        for pacer_court_id in pacer_court_ids:
-            court_failed = False
+    for pacer_court_id in pacer_court_ids:
+        court_failed = False
+        if not dates:
             # We don't pass the dates in the command, so we generate the range based
-            # on each court using last day queried until today to build the date range
+            # on each court
             date_end = datetime.date.today()
             date_start = get_last_complete_date(pacer_court_id)
             if not date_start:
@@ -233,43 +235,23 @@ def get_and_save_free_document_reports(
                 continue
             dates = make_date_range_tuples(date_start, date_end, gap=7)
 
-            # Iterate through the gap in dates either short or long
-            for _start, _end in dates:
-                exc = fetch_doc_report(
-                    pacer_court_id, _start, _end  # type: ignore
-                )
-                if exc:
-                    # Something happened with the queried date range, abort process for
-                    # that court
-                    court_failed = True
-                    break
+        # Iterate through the gap in dates either short or long
+        for _start, _end in dates:
+            exc = fetch_doc_report(
+                pacer_court_id, _start, _end  # type: ignore
+            )
+            if exc:
+                # Something happened with the queried date range, abort process for
+                # that court
+                court_failed = True
+                break
 
-                # Wait 1s between queries to try to avoid a possible throttling/blocking
-                # from the court
-                time.sleep(1)
+            # Wait 1s between queries to try to avoid a possible throttling/blocking
+            # from the court
+            time.sleep(1)
 
-            if court_failed:
-                continue
-    else:
-        # Custom date range, alternate courts on a weekly basis to generate report
-        # when running sweep based on specified date range
-        for _start, _end in dates:
-            court_failed = False
-            for pacer_court_id in pacer_court_ids:
-                exc = fetch_doc_report(
-                    pacer_court_id, _start, _end  # type: ignore
-                )
-                if exc:
-                    # Something happened with the queried date range, abort process for
-                    # that court
-                    court_failed = True
-                    break
-                # Wait 1s between queries to try to avoid a possible throttling/blocking
-                # from the court
-                time.sleep(1)
-
-            if court_failed:
-                continue
+        if court_failed:
+            continue
 
 
 def get_pdfs(
@@ -278,7 +260,6 @@ def get_pdfs(
     date_end: datetime.date,
     index: bool,
     queue: str,
-    pdf_days_ago: int,
 ) -> None:
     """Get PDFs for the results of the Free Document Report queries.
 
@@ -296,7 +277,6 @@ def get_pdfs(
     courts
     :param index: true if we should index as we process the data or do it later
     :param queue: the queue name
-    :param pdf_days_ago: specify the number of days ago from which to download PDFs
     :return: None
     """
     q = cast(str, queue)
@@ -311,13 +291,6 @@ def get_pdfs(
         # Download documents only from the date range passed from the command args (
         # sweep)
         base_filter &= Q(date_filed__gte=date_start, date_filed__lte=date_end)
-    else:
-        # Download documents only from 'pdf_days_ago' ago
-        date_start = datetime.date.today() - datetime.timedelta(
-            days=pdf_days_ago
-        )
-        date_end = datetime.date.today()
-        base_filter &= Q(date_filed__gte=date_start, date_filed__lte=date_end)
 
     # Filter rows based on the base_filter, then annotate each row with a row_number
     # within each partition defined by 'court_id', ordering the rows by 'pk' in
@@ -344,8 +317,17 @@ def get_pdfs(
     )
     throttle = CeleryThrottle(queue_name=q)
     completed = 0
+    cycle_checker = CycleChecker()
     for row in rows.iterator():
+        # Wait until the queue is short enough
         throttle.maybe_wait()
+
+        if cycle_checker.check_if_cycled(row.court_id):
+            print(
+                f"Court cycle completed. Sleep 1 second before starting the next cycle."
+            )
+            time.sleep(1)
+
         c = chain(
             process_free_opinion_result.si(
                 row.pk,
@@ -500,12 +482,6 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None:
             type=valid_date,
             help="Date when the query should end.",
         )
-        parser.add_argument(
-            "--pdf-days-ago",
-            type=int,
-            default=10,
-            help="Flag to only download PDFs from X days ago",
-        )
 
     def handle(self, *args: List[str], **options: OptionsType) -> None:
         super().handle(*args, **options)

From 4d0afb49fddaa2bbb6b72ec22ab24c12d8e0da9b Mon Sep 17 00:00:00 2001
From: Mike Lissner <mike@free.law>
Date: Thu, 29 Aug 2024 12:06:00 -0700
Subject: [PATCH 283/372] fix(html): Small tweak to the language

---
 cl/simple_pages/templates/help/coverage_fds.html | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/simple_pages/templates/help/coverage_fds.html b/cl/simple_pages/templates/help/coverage_fds.html
index d80930776c..cc3b3ca69f 100644
--- a/cl/simple_pages/templates/help/coverage_fds.html
+++ b/cl/simple_pages/templates/help/coverage_fds.html
@@ -57,7 +57,8 @@ <h1 id="overview">Data Coverage &mdash; What Financial Disclosures Does CourtLis
 
     <p>This collection was used by the <em>Wall Street Journal</em> in their <a href="https://www.wsj.com/articles/131-federal-judges-broke-the-law-by-hearing-cases-where-they-had-a-financial-interest-11632834421/">groundbreaking series on judicial conflicts of interest</a>.</p>
 
-    <p>In 2023, ProPublica utilized our dataset for an ambitious investigation into the Supreme Court, which led to their winning the prestigious public service Pulitzer Prize in 2024. The Pulitzer committee praised ProPublica's <a href="https://www.propublica.org/article/pulitzer-prize-announcement-propublica-supreme-court">"groundbreaking and ambitious reporting that pierced the thick wall of secrecy surrounding the Supreme Court to reveal how a small group of politically influential billionaires wooed justices with lavish gifts and travel, pushing the Court to adopt its first code of conduct."</a> This recognition underscores the critical importance of judicial transparency and the impact of our database in supporting investigative journalism.</p>
+    <p>In 2023, ProPublica utilized our dataset for an ambitious investigation into the Supreme Court, which led to their winning the prestigious public service Pulitzer Prize in 2024. The Pulitzer committee <a href="https://www.propublica.org/article/pulitzer-prize-announcement-propublica-supreme-court">praised</a> ProPublica's "groundbreaking and ambitious reporting that pierced the thick wall of secrecy surrounding the Supreme Court." This recognition underscores the critical importance of judicial transparency and the impact of our database in supporting investigative journalism.
+    </p>
 
     <hr>
 

From 6857740bb2aea04356f983ceabf9e4074877a9e8 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 29 Aug 2024 13:49:13 -0600
Subject: [PATCH 284/372] refactor(pacer_free_documents): move CycleChecker to
 corpus_importer/utils.py

---
 .../commands/bulk_iquery_project.py           | 53 +------------------
 .../commands/scrape_pacer_free_opinions.py    |  4 +-
 cl/corpus_importer/utils.py                   | 52 ++++++++++++++++++
 3 files changed, 54 insertions(+), 55 deletions(-)

diff --git a/cl/corpus_importer/management/commands/bulk_iquery_project.py b/cl/corpus_importer/management/commands/bulk_iquery_project.py
index 9781e5d9a0..79c32ba351 100644
--- a/cl/corpus_importer/management/commands/bulk_iquery_project.py
+++ b/cl/corpus_importer/management/commands/bulk_iquery_project.py
@@ -1,6 +1,5 @@
 import itertools
 import time
-from collections import defaultdict
 from typing import List, TypedDict
 
 from django.conf import settings
@@ -10,6 +9,7 @@
 from redis.exceptions import ConnectionError
 
 from cl.corpus_importer.tasks import make_docket_by_iquery
+from cl.corpus_importer.utils import CycleChecker
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand
 from cl.lib.redis_utils import get_redis_interface
@@ -94,57 +94,6 @@ def add_all_cases_to_cl(options: OptionsType) -> None:
             time.sleep(options["iteration_delay"])
 
 
-class CycleChecker:
-    """Keep track of a cycling list to determine each time it starts over.
-
-    We plan to iterate over dockets that are ordered by a cycling court ID, so
-    imagine if we had two courts, ca1 and ca2, we'd have rows like:
-
-        docket: 1, court: ca1
-        docket: 14, court: ca2
-        docket: 15, court: ca1
-        docket: xx, court: ca2
-
-    In other words, they'd just go back and forth. In reality, we have about
-    200 courts, but the idea is the same. This code lets us detect each time
-    the cycle has started over, even if courts stop being part of the cycle,
-    as will happen towards the end of the queryset.. For example, maybe ca1
-    finishes, and now we just have:
-
-        docket: x, court: ca2
-        docket: y, court: ca2
-        docket: z, court: ca2
-
-    That's considered cycling each time we get to a new row.
-
-    The way to use this is to just create an instance and then send it a
-    cycling list of court_id's.
-
-    Other fun requirements this hits:
-     - No need to know the length of the cycle
-     - No need to externally track the iteration count
-    """
-
-    def __init__(self) -> None:
-        self.court_counts: defaultdict = defaultdict(int)
-        self.current_iteration: int = 1
-
-    def check_if_cycled(self, court_id: str) -> bool:
-        """Check if the cycle repeated
-
-        :param court_id: The ID of the court
-        :return True if the cycle started over, else False
-        """
-        self.court_counts[court_id] += 1
-        if self.court_counts[court_id] == self.current_iteration:
-            return False
-        else:
-            # Finished cycle and court has been seen more times than the
-            # iteration count. Bump the iteration count and return True.
-            self.current_iteration += 1
-            return True
-
-
 def update_open_cases(options) -> None:
     """Update any cases that are in our system and not terminated."""
     # This is a very fancy query that fetches the results while cycling over
diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index 73e4562916..ddb5cbb8c0 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -16,9 +16,6 @@
 from requests import RequestException
 from urllib3.exceptions import ReadTimeoutError
 
-from cl.corpus_importer.management.commands.bulk_iquery_project import (
-    CycleChecker,
-)
 from cl.corpus_importer.tasks import (
     delete_pacer_row,
     get_and_process_free_pdf,
@@ -26,6 +23,7 @@
     mark_court_done_on_date,
     process_free_opinion_result,
 )
+from cl.corpus_importer.utils import CycleChecker
 from cl.lib.argparse_types import valid_date
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.command_utils import VerboseCommand, logger
diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index 3e032d08dc..e97668f56e 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -1,6 +1,7 @@
 import itertools
 import random
 import re
+from collections import defaultdict
 from datetime import date
 from difflib import SequenceMatcher
 from typing import Any, Iterator, Optional, Set
@@ -1108,3 +1109,54 @@ def compute_blocked_court_wait(court_blocked_attempts: int) -> tuple[int, int]:
         for i in range(court_blocked_attempts)
     )
     return current_wait_time, total_accumulated_time
+
+
+class CycleChecker:
+    """Keep track of a cycling list to determine each time it starts over.
+
+    We plan to iterate over dockets that are ordered by a cycling court ID, so
+    imagine if we had two courts, ca1 and ca2, we'd have rows like:
+
+        docket: 1, court: ca1
+        docket: 14, court: ca2
+        docket: 15, court: ca1
+        docket: xx, court: ca2
+
+    In other words, they'd just go back and forth. In reality, we have about
+    200 courts, but the idea is the same. This code lets us detect each time
+    the cycle has started over, even if courts stop being part of the cycle,
+    as will happen towards the end of the queryset.. For example, maybe ca1
+    finishes, and now we just have:
+
+        docket: x, court: ca2
+        docket: y, court: ca2
+        docket: z, court: ca2
+
+    That's considered cycling each time we get to a new row.
+
+    The way to use this is to just create an instance and then send it a
+    cycling list of court_id's.
+
+    Other fun requirements this hits:
+     - No need to know the length of the cycle
+     - No need to externally track the iteration count
+    """
+
+    def __init__(self) -> None:
+        self.court_counts: defaultdict = defaultdict(int)
+        self.current_iteration: int = 1
+
+    def check_if_cycled(self, court_id: str) -> bool:
+        """Check if the cycle repeated
+
+        :param court_id: The ID of the court
+        :return True if the cycle started over, else False
+        """
+        self.court_counts[court_id] += 1
+        if self.court_counts[court_id] == self.current_iteration:
+            return False
+        else:
+            # Finished cycle and court has been seen more times than the
+            # iteration count. Bump the iteration count and return True.
+            self.current_iteration += 1
+            return True

From 6b902c07ead9e581558cf1394de7efb58ea4b8b2 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 30 Aug 2024 10:01:13 -0500
Subject: [PATCH 285/372] feat(elasticsearch): Index party from case_name when
 normalized parties are not available

Fixes: #3046
---
 cl/lib/search_index_utils.py      |  21 +++++
 cl/search/documents.py            |  10 ++-
 cl/search/signals.py              |   2 +-
 cl/search/tasks.py                |  19 ++++-
 cl/search/tests/tests_es_recap.py | 124 +++++++++++++++++++++++++++++-
 5 files changed, 169 insertions(+), 7 deletions(-)

diff --git a/cl/lib/search_index_utils.py b/cl/lib/search_index_utils.py
index 3551d4c98e..8e4653cb5b 100644
--- a/cl/lib/search_index_utils.py
+++ b/cl/lib/search_index_utils.py
@@ -52,3 +52,24 @@ def normalize_search_dicts(d):
         else:
             new_dict[k] = v
     return new_dict
+
+
+def get_parties_from_case_name(case_name: str) -> list[str]:
+    """Extracts the parties from case_name by splitting on common case_name
+    separators.
+
+    :param case_name: The case_name to be split.
+    :return: A list of parties. If no valid separator is found, returns an
+    empty list.
+    """
+
+    valid_case_name_separators = [
+        " v ",
+        " v. ",
+        " vs. ",
+        " vs ",
+    ]
+    for separator in valid_case_name_separators:
+        if separator in case_name:
+            return case_name.split(separator, 1)
+    return []
diff --git a/cl/search/documents.py b/cl/search/documents.py
index d7b18f9472..e059b58f22 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -13,7 +13,7 @@
 from cl.lib.command_utils import logger
 from cl.lib.elasticsearch_utils import build_es_base_query
 from cl.lib.fields import JoinField, PercolatorField
-from cl.lib.search_index_utils import null_map
+from cl.lib.search_index_utils import get_parties_from_case_name, null_map
 from cl.lib.utils import deepgetattr
 from cl.people_db.models import (
     Attorney,
@@ -1231,6 +1231,14 @@ def prepare_parties(self, instance):
             out["party_id"].add(pk)
             out["party"].add(name)
 
+        if not out["party"]:
+            # Get party from docket case_name if no normalized parties are
+            # available.
+            party_from_case_name = get_parties_from_case_name(
+                instance.case_name
+            )
+            out["party"] = party_from_case_name if party_from_case_name else []
+
         # Extract only required attorney values.
         atty_values = (
             Attorney.objects.filter(roles__docket=instance)
diff --git a/cl/search/signals.py b/cl/search/signals.py
index 87d356d717..901588b9ff 100644
--- a/cl/search/signals.py
+++ b/cl/search/signals.py
@@ -280,7 +280,7 @@
     "save": {
         Docket: {
             "self": {
-                "case_name": ["caseName"],
+                "case_name": ["caseName", "party"],
                 "case_name_short": ["caseName"],
                 "case_name_full": ["case_name_full", "caseName"],
                 "docket_number": ["docketNumber"],
diff --git a/cl/search/tasks.py b/cl/search/tasks.py
index df7d337f26..c5daeb5073 100644
--- a/cl/search/tasks.py
+++ b/cl/search/tasks.py
@@ -34,7 +34,10 @@
 from cl.audio.models import Audio
 from cl.celery_init import app
 from cl.lib.elasticsearch_utils import build_daterange_query
-from cl.lib.search_index_utils import InvalidDocumentError
+from cl.lib.search_index_utils import (
+    InvalidDocumentError,
+    get_parties_from_case_name,
+)
 from cl.people_db.models import Person, Position
 from cl.search.documents import (
     ES_CHILD_ID,
@@ -451,7 +454,19 @@ def document_fields_to_update(
                     if prepare_method:
                         field_value = prepare_method(main_instance)
                     else:
-                        field_value = getattr(related_instance, field)
+                        if (
+                            es_document == DocketDocument
+                            and doc_field == "party"
+                        ):
+                            # Get party from docket case_name if no normalized
+                            # parties are available.
+                            if main_instance.parties.exists():
+                                continue
+                            field_value = get_parties_from_case_name(
+                                main_instance.case_name
+                            )
+                        else:
+                            field_value = getattr(related_instance, field)
                     fields_to_update[doc_field] = field_value
     else:
         # No fields_map is provided, extract field values only using the main
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 4f3b0da901..4340820b6a 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -862,7 +862,7 @@ def test_filter_docket_with_no_documents(self) -> None:
             docket.delete()
             docket_2.delete()
 
-    async def test_party_name_filter(self) -> None:
+    def test_party_name_filter(self) -> None:
         """Confirm party_name filter works properly"""
 
         params = {
@@ -871,7 +871,29 @@ async def test_party_name_filter(self) -> None:
         }
 
         # Frontend, 1 result expected since RECAPDocuments are grouped by case
-        await self._test_article_count(params, 1, "party_name")
+        async_to_sync(self._test_article_count)(params, 1, "party_name")
+
+        # Confirm parties extracted from case_name are available in filters.
+        with self.captureOnCommitCallbacks(execute=True):
+            d = DocketFactory(
+                court=self.court,
+                pacer_case_id="345784",
+                docket_number="12-cv-03345",
+                case_name="John Smith v. Bank of America",
+                source=Docket.RECAP,
+            )
+
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "party_name": "John Smith",
+        }
+        async_to_sync(self._test_article_count)(params, 1, "party_name")
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "party_name": "Bank of America",
+        }
+        async_to_sync(self._test_article_count)(params, 1, "party_name")
+        d.delete()
 
     def test_party_name_and_children_filter(self) -> None:
         """Confirm dockets with children are shown when using the party filter"""
@@ -4787,7 +4809,9 @@ def test_verify_empty_lists_type_fields_after_partial_update(self):
         and empties the list field.
         """
         with self.captureOnCommitCallbacks(execute=True) as callbacks:
-            d = DocketFactory(court=self.court, source=Docket.RECAP)
+            d = DocketFactory(
+                case_name="Lorem Ipsum", court=self.court, source=Docket.RECAP
+            )
             firm = AttorneyOrganizationFactory(
                 lookup_key="00kingofprussiaroadradnorkesslertopazmeltze87437",
                 name="Law Firm LLP",
@@ -6699,6 +6723,100 @@ def test_prepare_parties(self) -> None:
             {firm.name, firm_2.name, firm_2_1.name, firm_1_2.name},
         )
 
+    def test_index_party_from_case_name_when_parties_are_not_available(
+        self,
+    ) -> None:
+        """Confirm that the party field is populated by splitting the case_name
+        when a valid separator is present.
+        """
+
+        docket_with_parties = DocketFactory(
+            court=self.court,
+            case_name="Lorem v. Dolor",
+            docket_number="1:21-bk-4444",
+            source=Docket.RECAP,
+        )
+        firm = AttorneyOrganizationFactory(
+            lookup_key="280kingofprussiaroadradnorkesslertopazmeltzercheck1536",
+            name="Law Firm LLP",
+        )
+        attorney = AttorneyFactory(
+            name="Emily Green",
+            organizations=[firm],
+            docket=docket_with_parties,
+        )
+        party_type = PartyTypeFactory.create(
+            party=PartyFactory(
+                name="Mary Williams Corp.",
+                docket=docket_with_parties,
+                attorneys=[attorney],
+            ),
+            docket=docket_with_parties,
+        )
+        index_docket_parties_in_es.delay(docket_with_parties.pk)
+        docket_with_no_parties = DocketFactory(
+            court=self.court,
+            case_name="Bank v. Smith",
+            docket_number="1:21-bk-4445",
+            source=Docket.RECAP,
+        )
+
+        docket_doc_parties = DocketDocument.get(docket_with_parties.pk)
+        docket_doc_no_parties = DocketDocument.get(docket_with_no_parties.pk)
+
+        # Assert party on initial indexing.
+        self.assertEqual(docket_doc_parties.party, ["Mary Williams Corp."])
+        self.assertEqual(docket_doc_no_parties.party, ["Bank", "Smith"])
+
+        # Modify the docket case_name. Assert that parties are not overwritten
+        # in a docket with normalized parties.
+        docket_with_parties.case_name = "Lorem v. Ipsum"
+        docket_with_parties.save()
+        docket_doc_parties = DocketDocument.get(docket_with_parties.pk)
+        self.assertEqual(docket_doc_parties.party, ["Mary Williams Corp."])
+
+        # Modify the docket case_name. Assert that parties are updated if the
+        # docket does not contain normalized parties.
+        docket_with_no_parties.case_name = "America v. Smith"
+        docket_with_no_parties.save()
+        docket_doc_no_parties = DocketDocument.get(docket_with_no_parties.pk)
+        self.assertEqual(docket_doc_no_parties.party, ["America", "Smith"])
+
+        # Test that parties are not extracted from the case_name if it does not contain
+        # a valid separator.
+        docket_with_no_parties_no_separator = DocketFactory(
+            court=self.court,
+            case_name="In re: Bank Smith",
+            docket_number="1:21-bk-4446",
+            source=Docket.RECAP,
+        )
+        docket_with_no_parties_no_separator = DocketDocument.get(
+            docket_with_no_parties_no_separator.pk
+        )
+        self.assertEqual(docket_with_no_parties_no_separator.party, [])
+
+        # Confirm that normalized parties can overwrite the case_name parties.
+        attorney_2 = AttorneyFactory(
+            name="John Green",
+            organizations=[firm],
+            docket=docket_with_no_parties,
+        )
+        PartyTypeFactory.create(
+            party=PartyFactory(
+                name="Bank Corp.",
+                docket=docket_with_no_parties,
+                attorneys=[attorney_2],
+            ),
+            docket=docket_with_no_parties,
+        )
+        index_docket_parties_in_es.delay(docket_with_no_parties.pk)
+        docket_doc_no_parties = DocketDocument.get(docket_with_no_parties.pk)
+        self.assertEqual(docket_doc_no_parties.party, ["Bank Corp."])
+
+        docket_with_parties.delete()
+        docket_doc_no_parties.delete()
+        docket_with_no_parties_no_separator.delete()
+
 
 class RECAPHistoryTablesIndexingTest(
     RECAPSearchTestCase, ESIndexTestCase, TestCase

From 0ae37b91d33210c8b76bbfc8ce04898867f51473 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 30 Aug 2024 11:09:18 -0600
Subject: [PATCH 286/372] feat(columbia_ordering): update columbia ordering
 code

---
 .../commands/update_opinions_order.py         | 370 +++++-------------
 1 file changed, 97 insertions(+), 273 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 9516476358..3c386c80a9 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,20 +1,12 @@
 import argparse
-import os
 import re
 import time
-from typing import Optional
+from typing import Any, List
 
 from bs4 import BeautifulSoup
 from django.db import transaction
-from django.db.models import Count, Exists, OuterRef
-
-from cl.corpus_importer.import_columbia.columbia_utils import (
-    extract_columbia_opinions,
-    map_opinion_types,
-    process_extracted_opinions,
-    read_xml_to_soup,
-)
-from cl.corpus_importer.utils import EmptyOpinionException, match_opinion_lists
+from django.db.models import Count
+
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.search.models import SOURCES, Opinion, OpinionCluster
 
@@ -84,157 +76,73 @@ def sort_harvard_opinions(options: dict) -> None:
             time.sleep(options["delay"])
 
 
-def fix_filepath(filepath: str) -> str:
-    """Fix filepath from file field
-
-    :param filepath: path from file field
-    :return: new file path
-    """
-    if "/home/mlissner/columbia/opinions/" in filepath:
-        filepath = filepath.replace("/home/mlissner/columbia/opinions/", "")
-    return filepath
-
-
-def clean_opinion_content(text: str) -> str:
-    """Clean opinion content
+def get_xml(filepath: str) -> str:
+    """Get cleaned columbia content to compare opinions against
 
-    :param text: text to clean
-    :return: cleaned text
+    :param filepath: the filepath
+    :return: the opinion text cleaned up
     """
+    with open(filepath, "r") as f:
+        content = f.read()
 
-    # Replace line breaks with spaces and get rid of double spaces
-    text = re.sub(" +", " ", " ".join(text.split("\n"))).strip()
+    columbia_xml = BeautifulSoup(content, "html.parser")
+    clean_columbia_text = re.sub(r"[^a-zA-Z0-9\s]+", " ", columbia_xml.text)
+    clean_columbia_text = re.sub(r"\s+", " ", clean_columbia_text)
+    return clean_columbia_text
 
-    # Remove non-alphanumeric and non-whitespace characters from lowercased text
-    return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
 
+def generate_ngrams(words: List[str]) -> List[List[str]]:
+    """Generate n-grams based on the length of the word list.
 
-def get_opinions_cleaned_content(
-    cluster_id,
-) -> tuple[Optional[str], list[dict]]:
-    """Get cleaned opinions content for a cluster object
+    Pass in a list of words in an opinion and divide it up into n-grams
+    based on the length of it.  for small opinions look for bigrams or
+    single unique words
 
-    :param cluster_id: Cluster ID for a set of opinions
-    :return: (xml path, list of extracted opinions)
+    :param words: a list of words obtained splitting the opinion
+    :return: n-grams
     """
-    cl_cleaned_opinions = []
-    # by default the opinions are ordered by pk
-    opinions_from_cluster = (
-        Opinion.objects.filter(cluster_id=cluster_id)
-        .order_by("id")
-        .exclude(type="010combined")
-    )
+    if len(words) <= 5:
+        return [[x] for x in words]
+    elif 5 < len(words) < 25:
+        return [words[i : i + 2] for i in range(len(words) - 1)]
+    else:
+        return [words[i : i + 5] for i in range(len(words) - 4)]
 
-    xml_path = None
 
-    for i, op in enumerate(opinions_from_cluster):
-        if op.local_path and not xml_path:
-            xml_path = str(op.local_path)
+def match_text(opinions: List[Any], xml_dir: str) -> List[List[Any]]:
+    """Identify a unique set of text in opinions to identify order of opinions
 
-        content = None
+    In a small subset of opinions, duplicate text or bad data fails and assign
+    the end of the index.  These opinions are usually short dissents that we
+    add to be the back of the order.
 
-        # We can only use columbia's content to infer the ordering
-        if len(op.html_columbia) > 1:
-            content = op.html_columbia
-
-        if not content:
-            raise EmptyOpinionException(
-                "There is no content in html_columbia field"
-            )
-
-        soup = BeautifulSoup(content, features="html.parser")
-        opinion_text = soup.getText(separator=" ", strip=True)
-        prep_text = clean_opinion_content(opinion_text)
-
-        cl_cleaned_opinions.append(
-            {
-                "id": op.id,
-                "byline": op.author_str,
-                "type": op.type,
-                "opinion": prep_text,
-                "order": i,
-            }
-        )
-
-    return (
-        xml_path,
-        cl_cleaned_opinions,
-    )
-
-
-def get_opinions_columbia_file(xml_filepath: str) -> list:
-    """Get opinions from columbia xml file and convert it into dict
-
-    :param xml_filepath: path of xml file
-    :return: dict with data
+    :param opinions: Opinions to sort
+    :param xml_dir: Path to directory of the xml files
+    :return: Ordered opinion list
     """
-    soup = read_xml_to_soup(xml_filepath)
-
-    # Find the outer <opinion> tag to have all elements inside
-    outer_opinion = soup.find("opinion")
-
-    extracted_opinions = extract_columbia_opinions(outer_opinion)
-    opinions = process_extracted_opinions(extracted_opinions)
-    map_opinion_types(opinions)
-
-    for op in opinions:
-        opinion_content = op.get("opinion")
-        soup = BeautifulSoup(opinion_content, "html.parser")
-        opinion_text = soup.getText(separator=" ", strip=True)
-        cleaned_opinion = clean_opinion_content(opinion_text)
-        op["opinion"] = cleaned_opinion
-
-    return opinions
-
-
-def update_opinions(
-    cluster_id: int,
-    cl_opinions: list,
-    columbia_opinions: list,
-    matches: dict,
-):
-    """Update opinions with correct order
-
-    :param cluster_id:
-    :param cl_opinions: a list with cleaned opinions from cl
-    :param columbia_opinions: an ordered list with cleaned opinions from xml file
-    :param matches: a dict with the matches of each opinion of both lists
-    :return: None
-    """
-    update_failed = False
-
-    with transaction.atomic():
-        for file_pos, cl_pos in matches.items():
-            # file_pos is the correct index to find the opinion id to update
-            file_opinion = columbia_opinions[file_pos]
-            # the order was calculated using the xml file
-            file_order = file_opinion.get("order")
-            cl_opinion = cl_opinions[cl_pos]
-            opinion_id_to_update = cl_opinion.get("id")
-
-            if opinion_id_to_update:
-                try:
-                    # Update opinion order
-                    op = Opinion.objects.get(id=opinion_id_to_update)
-                    op.ordering_key = file_order
-                    op.save()
-                except Opinion.DoesNotExist:
-                    # This should not happen, but it is better to be cautious
-                    logger.warning(
-                        f"We can't update opinion, opinion doesn't exist "
-                        f"with id: {opinion_id_to_update}"
-                    )
-                    update_failed = True
-                    break
-
-        if update_failed:
-            # There was an error updating an opinion, rollback all changes for
-            # cluster's opinions
-            logger.warning(
-                f"There was an error updating the order of opinions of the "
-                f"cluster id: {cluster_id}"
-            )
-            transaction.set_rollback(True)
+    local_path = opinions[0][-1]
+    filepath = local_path.replace("/home/mlissner/columbia/opinions", xml_dir)
+
+    columbia_words = get_xml(filepath=filepath)
+    matches = []
+
+    for opinion in opinions:
+        # assign back up index to the end of the opinion
+        match_index = len(columbia_words)
+        soup = BeautifulSoup(opinion[2], "html.parser")
+        words = re.findall(r"\b\w+\b", soup.text)
+        ngrams_to_check = generate_ngrams(words)
+
+        # Check for unique matches in columbia_text
+        for word_group in ngrams_to_check:
+            phrase = " ".join(word_group)
+            if columbia_words.count(phrase) == 1:
+                # Found a match - break the check
+                match_index = columbia_words.find(phrase)
+                break
+        matches.append([opinion[0], opinion[1], match_index])
+    ordered_opinions = sorted(matches, key=lambda x: x[-1])
+    return ordered_opinions
 
 
 def sort_columbia_opinions(options: dict) -> None:
@@ -243,138 +151,54 @@ def sort_columbia_opinions(options: dict) -> None:
     :param options: dict of arguments passed to the command
     :return: None
     """
-
+    xml_dir = options["xml_dir"]
     skip_until = options.get("skip_until", None)
-    limit = options.get("limit", None)
-
-    # Get all columbia cluster ids with more than one opinion and where opinions has
-    # a local_path, we require the xml path and file to infer the ordering
     clusters = (
-        OpinionCluster.objects.annotate(
-            opinions_count=Count("sub_opinions"),
-            has_non_empty_file=Exists(
-                Opinion.objects.filter(
-                    cluster=OuterRef("pk"),
-                    local_path__isnull=False,
-                    local_path__gt="",
-                )
-            ),
-        )
-        .filter(
-            opinions_count__gt=1,
-            source__in=VALID_COLUMBIA_SOURCES,
-            has_non_empty_file=True,
+        OpinionCluster.objects.filter(
+            source__contains=SOURCES.COLUMBIA_ARCHIVE
         )
         .order_by("id")
         .values_list("id", flat=True)
     )
-
-    if skip_until:
-        clusters = clusters.filter(pk__gte=skip_until)
-
-    if limit:
-        clusters = clusters[:limit]
+    if skip_until is not None:
+        clusters = clusters.filter(id__gt=skip_until)
 
     for cluster_id in clusters:
-        logger.info(f"Processing cluster id: {cluster_id}")
-
-        try:
-            xml_path, cl_cleaned_opinions = get_opinions_cleaned_content(
-                cluster_id
-            )
-        except EmptyOpinionException:
-            logger.warning(
-                f"At least one of the html_columbia fields in the opinions from "
-                f"cluster id: {cluster_id} is empty"
-            )
-            continue
-
-        if not xml_path:
-            logger.warning(
-                f"Unable to find an xml file assigned to any opinion for cluster id: "
-                f"{cluster_id}"
-            )
+        logger.info(f"Starting opinion cluster: {cluster_id}")
+        opinions = (
+            Opinion.objects.filter(cluster=cluster_id)
+            .exclude(local_path="")
+            .values_list("id", "type", "html_columbia", "local_path")
+        )
+        op_types = [op[1] for op in opinions]
+        if len(opinions) < 2:
+            # less than two opinions ... easy peasy
+            logger.info(f"Skipping opinion cluster with only one opinion.")
             continue
-
-        extracted_columbia_opinions = None
-        if xml_path:
-            fixed_xml_filepath = os.path.join(
-                options.get("xml_dir"), fix_filepath(xml_path)  # type: ignore
-            )
-
-            if not os.path.exists(fixed_xml_filepath):
-                logger.warning(
-                    f"Xml file not found in {fixed_xml_filepath}, cluster id: {cluster_id}"
-                )
-                continue
-
-            try:
-                extracted_columbia_opinions = get_opinions_columbia_file(
-                    fixed_xml_filepath
-                )
-            except UnicodeDecodeError:
-                logger.warning(
-                    f"Cannot decode file: {fixed_xml_filepath}, cluster id: {cluster_id}"
-                )
-                continue
-
-        if cl_cleaned_opinions and extracted_columbia_opinions:
-            columbia_opinions_content = [
-                op.get("opinion")
-                for op in extracted_columbia_opinions
-                if op.get("opinion")
-            ]
-            cl_opinions_content = [
-                op.get("opinion")
-                for op in cl_cleaned_opinions
-                if op.get("opinion")
-            ]
-
-            if len(columbia_opinions_content) != len(cl_opinions_content):
-                logger.warning(
-                    f"The number of opinions in cl and the number of opinions in the "
-                    f"xml is different, cluster id: {cluster_id}"
-                )
-                continue
-
-            # Try to match content between cl and xml
-            matches = match_opinion_lists(
-                columbia_opinions_content,
-                cl_opinions_content,
-            )
-
-            if matches:
-                if len(matches.values()) != len(set(matches.values())):
-                    # We don't have a unique match for each opinion, they were
-                    # probably combined incorrectly
-                    logger.info(
-                        f"We can't infer opinions order for cluster id: {cluster_id}"
-                    )
-                    # Go to next cluster id
-                    continue
-
-                if len(cl_cleaned_opinions) > len(set(matches.values())):
-                    # We have more opinions than matches
-                    logger.info(
-                        f"We couldn't match all cl opinions to the file's "
-                        f"content, cluster id: {cluster_id}"
-                    )
-                    # Go to next cluster id
-                    continue
-
-                # All opinions matched, update all opinions order
-                update_opinions(
-                    cluster_id,
-                    cl_cleaned_opinions,
-                    extracted_columbia_opinions,
-                    matches,
-                )
-
-                logger.info(
-                    msg=f"Columbia opinions reordered for cluster id: {cluster_id}"
-                )
-                # Wait between each processed cluster to avoid issues with elastic
-                time.sleep(options["delay"])
+        elif (
+            len(op_types) == 2
+            and Opinion.LEAD in op_types
+            and len(set(op_types)) == 2
+        ):
+            # If only two opinions and one is the lead - assign it to the number 1
+            logger.info(f"Sorting opinions with 1 Lead Opinion.")
+            opinions = [op[:2] for op in opinions]
+            ordered_opinions = sorted(opinions, key=lambda fields: fields[1])
+        else:
+            logger.info(f"Sorting order by location.")
+            ordered_opinions = match_text(opinions, xml_dir)
+
+        order_key = 1
+        for op in ordered_opinions:
+            opinion_obj = Opinion.objects.get(id=op[0])
+            opinion_obj.ordering_key = order_key
+            opinion_obj.save()
+            order_key += 1
+
+        logger.info(f"Opinion Cluster Saved.")
+
+        # Wait between each processed cluster to avoid issues with elastic
+        time.sleep(options["delay"])
 
 
 class Command(VerboseCommand):
@@ -421,7 +245,7 @@ def add_arguments(self, parser):
         )
         parser.add_argument(
             "--xml-dir",
-            default="/opt/courtlistener/_columbia",
+            default="/opt/courtlistener/columbia/usb",
             required=False,
             help="The absolute path to the directory with columbia xml files",
         )

From 91edfb4704351a88885b0fe5bb305bdf0de03a2c Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 30 Aug 2024 11:13:24 -0600
Subject: [PATCH 287/372] fix(columbia_ordering): force encoding when reading
 xml file

---
 cl/corpus_importer/management/commands/update_opinions_order.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 3c386c80a9..0d92d60e4d 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -82,7 +82,7 @@ def get_xml(filepath: str) -> str:
     :param filepath: the filepath
     :return: the opinion text cleaned up
     """
-    with open(filepath, "r") as f:
+    with open(filepath, "r", encoding="utf-8") as f:
         content = f.read()
 
     columbia_xml = BeautifulSoup(content, "html.parser")

From f5d00c8be192a0473099cd0a7b0d9f30da385352 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 30 Aug 2024 11:26:13 -0600
Subject: [PATCH 288/372] fix(columbia_ordering): add log messages

---
 .../management/commands/update_opinions_order.py     | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 0d92d60e4d..1dba50ff57 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -49,6 +49,9 @@ def sort_harvard_opinions(options: dict) -> None:
     if limit:
         harvard_clusters = harvard_clusters[:limit]
 
+    logger.info(f"Harvard clusters to process: {harvard_clusters.count()}")
+
+    completed = 0
     for cluster in harvard_clusters:
         logger.info(f"Processing cluster id: {cluster}")
         opinion_order = 1
@@ -72,9 +75,12 @@ def sort_harvard_opinions(options: dict) -> None:
             logger.info(
                 msg=f"Harvard opinions reordered for cluster id: {cluster.id}"
             )
+            completed += 1
             # Wait between each processed cluster to avoid issues with elastic
             time.sleep(options["delay"])
 
+    logger.info(f"Processed Harvard clusters: {completed}")
+
 
 def get_xml(filepath: str) -> str:
     """Get cleaned columbia content to compare opinions against
@@ -163,6 +169,8 @@ def sort_columbia_opinions(options: dict) -> None:
     if skip_until is not None:
         clusters = clusters.filter(id__gt=skip_until)
 
+    completed = 0
+    logger.info(f"Columbia clusters to process: {clusters.count()}")
     for cluster_id in clusters:
         logger.info(f"Starting opinion cluster: {cluster_id}")
         opinions = (
@@ -195,11 +203,15 @@ def sort_columbia_opinions(options: dict) -> None:
             opinion_obj.save()
             order_key += 1
 
+        completed += 1
+
         logger.info(f"Opinion Cluster Saved.")
 
         # Wait between each processed cluster to avoid issues with elastic
         time.sleep(options["delay"])
 
+    logger.info(f"Processed Columbia clusters: {completed}")
+
 
 class Command(VerboseCommand):
     help = "Add ordering Key for sub opinions"

From ac41985847199886d054ca3e7053a500d5638e1c Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 30 Aug 2024 12:26:27 -0600
Subject: [PATCH 289/372] refactor(columbia_ordering): remove unused code

update doctstrings
---
 .../commands/update_opinions_order.py         | 28 +++++++++----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 1dba50ff57..e11456b2c3 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -10,12 +10,6 @@
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.search.models import SOURCES, Opinion, OpinionCluster
 
-VALID_COLUMBIA_SOURCES = [
-    key
-    for key in dict(SOURCES.NAMES).keys()
-    if SOURCES.COLUMBIA_ARCHIVE in key
-]
-
 
 def sort_harvard_opinions(options: dict) -> None:
     """Sort harvard opinions
@@ -76,7 +70,7 @@ def sort_harvard_opinions(options: dict) -> None:
                 msg=f"Harvard opinions reordered for cluster id: {cluster.id}"
             )
             completed += 1
-            # Wait between each processed cluster to avoid issues with elastic
+            # Wait between each processed cluster to avoid issues with redis memory
             time.sleep(options["delay"])
 
     logger.info(f"Processed Harvard clusters: {completed}")
@@ -100,9 +94,8 @@ def get_xml(filepath: str) -> str:
 def generate_ngrams(words: List[str]) -> List[List[str]]:
     """Generate n-grams based on the length of the word list.
 
-    Pass in a list of words in an opinion and divide it up into n-grams
-    based on the length of it.  for small opinions look for bigrams or
-    single unique words
+    Pass in a list of words in an opinion and divide it up into n-grams based on the
+    length of it. For small opinions look for bigrams or single unique words
 
     :param words: a list of words obtained splitting the opinion
     :return: n-grams
@@ -118,9 +111,9 @@ def generate_ngrams(words: List[str]) -> List[List[str]]:
 def match_text(opinions: List[Any], xml_dir: str) -> List[List[Any]]:
     """Identify a unique set of text in opinions to identify order of opinions
 
-    In a small subset of opinions, duplicate text or bad data fails and assign
-    the end of the index.  These opinions are usually short dissents that we
-    add to be the back of the order.
+    In a small subset of opinions, duplicate text or bad data fails and assign the
+    end of the index. These opinions are usually short dissents that we add to be the
+    back of the order.
 
     :param opinions: Opinions to sort
     :param xml_dir: Path to directory of the xml files
@@ -159,6 +152,8 @@ def sort_columbia_opinions(options: dict) -> None:
     """
     xml_dir = options["xml_dir"]
     skip_until = options.get("skip_until", None)
+    limit = options.get("limit", None)
+
     clusters = (
         OpinionCluster.objects.filter(
             source__contains=SOURCES.COLUMBIA_ARCHIVE
@@ -169,6 +164,9 @@ def sort_columbia_opinions(options: dict) -> None:
     if skip_until is not None:
         clusters = clusters.filter(id__gt=skip_until)
 
+    if limit:
+        clusters = clusters[:limit]
+
     completed = 0
     logger.info(f"Columbia clusters to process: {clusters.count()}")
     for cluster_id in clusters:
@@ -180,7 +178,7 @@ def sort_columbia_opinions(options: dict) -> None:
         )
         op_types = [op[1] for op in opinions]
         if len(opinions) < 2:
-            # less than two opinions ... easy peasy
+            # Only one opinion is shown, no need to order
             logger.info(f"Skipping opinion cluster with only one opinion.")
             continue
         elif (
@@ -207,7 +205,7 @@ def sort_columbia_opinions(options: dict) -> None:
 
         logger.info(f"Opinion Cluster Saved.")
 
-        # Wait between each processed cluster to avoid issues with elastic
+        # Wait between each processed cluster to avoid issues with redis memory
         time.sleep(options["delay"])
 
     logger.info(f"Processed Columbia clusters: {completed}")

From fe6e096bff3b41e5445dc73fba4db514e75c625f Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 30 Aug 2024 14:17:01 -0500
Subject: [PATCH 290/372] fix(elasticsearch): Move sweep_indexer
 supported_models to settings

---
 cl/search/management/commands/sweep_indexer.py |  9 +--------
 cl/settings/third_party/elasticsearch.py       | 12 ++++++++++++
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/cl/search/management/commands/sweep_indexer.py b/cl/search/management/commands/sweep_indexer.py
index 02da4e3687..51ec57807f 100644
--- a/cl/search/management/commands/sweep_indexer.py
+++ b/cl/search/management/commands/sweep_indexer.py
@@ -27,14 +27,7 @@
 )
 from cl.search.types import ESDocumentClassType
 
-supported_models = [
-    "audio.Audio",
-    "people_db.Person",
-    "search.OpinionCluster",
-    "search.Opinion",
-    "search.Docket",
-    "search.RECAPDocument",
-]
+supported_models = settings.ELASTICSEARCH_SWEEP_INDEXER_MODELS
 r = get_redis_interface("CACHE")
 
 
diff --git a/cl/settings/third_party/elasticsearch.py b/cl/settings/third_party/elasticsearch.py
index cea935a3d0..3a7cbfb32e 100644
--- a/cl/settings/third_party/elasticsearch.py
+++ b/cl/settings/third_party/elasticsearch.py
@@ -248,6 +248,18 @@
 ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE = env(
     "ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE", default=60
 )
+ELASTICSEARCH_SWEEP_INDEXER_MODELS = env(
+    "ELASTICSEARCH_SWEEP_INDEXER_MODELS",
+    default=[
+        "audio.Audio",
+        "people_db.Person",
+        "search.OpinionCluster",
+        "search.Opinion",
+        "search.Docket",
+        "search.RECAPDocument",
+    ],
+)
+
 
 ELASTICSEARCH_MAX_RESULT_COUNT = 10_000
 ELASTICSEARCH_CARDINALITY_PRECISION = 2000

From 3a3c75efbf7887c5a0f797d307fbc8f9c0538638 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 30 Aug 2024 15:20:48 -0400
Subject: [PATCH 291/372] feat(ordering-columbia): Improve readability

refactor the ngram generator
change function name for getting cleaned columbia text
---
 .../commands/update_opinions_order.py         | 41 ++++++++++---------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index e11456b2c3..6712130f0f 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -76,7 +76,7 @@ def sort_harvard_opinions(options: dict) -> None:
     logger.info(f"Processed Harvard clusters: {completed}")
 
 
-def get_xml(filepath: str) -> str:
+def fetch_cleaned_columbia_text(filepath: str) -> str:
     """Get cleaned columbia content to compare opinions against
 
     :param filepath: the filepath
@@ -97,15 +97,17 @@ def generate_ngrams(words: List[str]) -> List[List[str]]:
     Pass in a list of words in an opinion and divide it up into n-grams based on the
     length of it. For small opinions look for bigrams or single unique words
 
+    Default to a 5 word n-gram unless opinion is very small
+
     :param words: a list of words obtained splitting the opinion
     :return: n-grams
     """
+    width = 5
     if len(words) <= 5:
-        return [[x] for x in words]
-    elif 5 < len(words) < 25:
-        return [words[i : i + 2] for i in range(len(words) - 1)]
-    else:
-        return [words[i : i + 5] for i in range(len(words) - 4)]
+        width = 1
+    elif len(words) < 25:
+        width = 2
+    return [words[i : i + width] for i in range(len(words) - (width - 1))]
 
 
 def match_text(opinions: List[Any], xml_dir: str) -> List[List[Any]]:
@@ -119,16 +121,18 @@ def match_text(opinions: List[Any], xml_dir: str) -> List[List[Any]]:
     :param xml_dir: Path to directory of the xml files
     :return: Ordered opinion list
     """
-    local_path = opinions[0][-1]
+    _, _, _, local_path = opinions[0]
     filepath = local_path.replace("/home/mlissner/columbia/opinions", xml_dir)
 
-    columbia_words = get_xml(filepath=filepath)
+    columbia_words = fetch_cleaned_columbia_text(filepath=filepath)
     matches = []
-
     for opinion in opinions:
+        opinion_id, opinion_type, opinion_html, _ = opinion
+
         # assign back up index to the end of the opinion
         match_index = len(columbia_words)
-        soup = BeautifulSoup(opinion[2], "html.parser")
+
+        soup = BeautifulSoup(opinion_html, "html.parser")
         words = re.findall(r"\b\w+\b", soup.text)
         ngrams_to_check = generate_ngrams(words)
 
@@ -136,10 +140,9 @@ def match_text(opinions: List[Any], xml_dir: str) -> List[List[Any]]:
         for word_group in ngrams_to_check:
             phrase = " ".join(word_group)
             if columbia_words.count(phrase) == 1:
-                # Found a match - break the check
                 match_index = columbia_words.find(phrase)
                 break
-        matches.append([opinion[0], opinion[1], match_index])
+        matches.append([opinion_id, opinion_type, match_index])
     ordered_opinions = sorted(matches, key=lambda x: x[-1])
     return ordered_opinions
 
@@ -162,11 +165,12 @@ def sort_columbia_opinions(options: dict) -> None:
         .values_list("id", flat=True)
     )
     if skip_until is not None:
-        clusters = clusters.filter(id__gt=skip_until)
-
+        clusters = clusters.filter(id__gte=skip_until)
     if limit:
         clusters = clusters[:limit]
 
+    print(clusters)
+
     completed = 0
     logger.info(f"Columbia clusters to process: {clusters.count()}")
     for cluster_id in clusters:
@@ -194,16 +198,15 @@ def sort_columbia_opinions(options: dict) -> None:
             logger.info(f"Sorting order by location.")
             ordered_opinions = match_text(opinions, xml_dir)
 
-        order_key = 1
+        ordering_key = 1
         for op in ordered_opinions:
             opinion_obj = Opinion.objects.get(id=op[0])
-            opinion_obj.ordering_key = order_key
+            opinion_obj.ordering_key = ordering_key
             opinion_obj.save()
-            order_key += 1
+            ordering_key += 1
 
         completed += 1
-
-        logger.info(f"Opinion Cluster Saved.")
+        logger.info(f"Opinion Cluster completed.")
 
         # Wait between each processed cluster to avoid issues with redis memory
         time.sleep(options["delay"])

From b9964a4dee097ad1c80f79e334df83809e136b95 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 30 Aug 2024 14:26:42 -0500
Subject: [PATCH 292/372] fix(elasticsearch): Fixed
 ELASTICSEARCH_SWEEP_INDEXER_MODELS mypy complaint

---
 cl/search/management/commands/sweep_indexer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/search/management/commands/sweep_indexer.py b/cl/search/management/commands/sweep_indexer.py
index 51ec57807f..4cc7b0bc4f 100644
--- a/cl/search/management/commands/sweep_indexer.py
+++ b/cl/search/management/commands/sweep_indexer.py
@@ -27,7 +27,7 @@
 )
 from cl.search.types import ESDocumentClassType
 
-supported_models = settings.ELASTICSEARCH_SWEEP_INDEXER_MODELS
+supported_models = settings.ELASTICSEARCH_SWEEP_INDEXER_MODELS  # type: ignore
 r = get_redis_interface("CACHE")
 
 

From e55391513fa12a3246da4eaf509c3b55273818f1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 30 Aug 2024 14:41:36 -0600
Subject: [PATCH 293/372] fix(ordering): use iterator() for querysets

---
 .../management/commands/update_opinions_order.py             | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 6712130f0f..b7bf52f9a7 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -31,7 +31,6 @@ def sort_harvard_opinions(options: dict) -> None:
     # clusters merged with columbia because those may need some extra verification
     harvard_clusters = (
         OpinionCluster.objects.exclude(filepath_json_harvard="")
-        .prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1)
         .exclude(source__contains=SOURCES.COLUMBIA_ARCHIVE)
@@ -46,7 +45,7 @@ def sort_harvard_opinions(options: dict) -> None:
     logger.info(f"Harvard clusters to process: {harvard_clusters.count()}")
 
     completed = 0
-    for cluster in harvard_clusters:
+    for cluster in harvard_clusters.iterator():
         logger.info(f"Processing cluster id: {cluster}")
         opinion_order = 1
         any_update = False
@@ -173,7 +172,7 @@ def sort_columbia_opinions(options: dict) -> None:
 
     completed = 0
     logger.info(f"Columbia clusters to process: {clusters.count()}")
-    for cluster_id in clusters:
+    for cluster_id in clusters.iterator():
         logger.info(f"Starting opinion cluster: {cluster_id}")
         opinions = (
             Opinion.objects.filter(cluster=cluster_id)

From 9b4135cb0ba9a75effc46b08e6442b4dd730e318 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 30 Aug 2024 15:01:40 -0600
Subject: [PATCH 294/372] fix(ordering): remove print

---
 cl/corpus_importer/management/commands/update_opinions_order.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index b7bf52f9a7..f499f45fa0 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -168,8 +168,6 @@ def sort_columbia_opinions(options: dict) -> None:
     if limit:
         clusters = clusters[:limit]
 
-    print(clusters)
-
     completed = 0
     logger.info(f"Columbia clusters to process: {clusters.count()}")
     for cluster_id in clusters.iterator():

From d3d026af876e7d4be849e0193d57b50d19066908 Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Fri, 30 Aug 2024 21:16:45 +0000
Subject: [PATCH 295/372] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index ce4d6cbc35..163437447c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2238,13 +2238,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.20"
+version = "2.6.21"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.20-py27-none-any.whl", hash = "sha256:5de2830f5c0593ad3ea0ebbdfbac203bc4c4da537461a890d49d1d0c439b9ac7"},
-    {file = "juriscraper-2.6.20.tar.gz", hash = "sha256:895744c1edde3828f4ff9f62c1366010bb636e54e59bb6848729e598cc15fd69"},
+    {file = "juriscraper-2.6.21-py27-none-any.whl", hash = "sha256:04a77ece539dc76413becafb5ebf9ed19abe16b6fc9669fbff8ffae983dafb82"},
+    {file = "juriscraper-2.6.21.tar.gz", hash = "sha256:b34be14ca6c232a5cf997cf73d04661aa076ec399e0db9bfbcaf59f6cfe200e3"},
 ]
 
 [package.dependencies]

From 118bcda71079ced2a79e4e2348fa3888762d52f1 Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Thu, 29 Aug 2024 12:22:56 -0600
Subject: [PATCH 296/372] Add missing pacer_doc_id param

---
 cl/recap/mergers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 5e726194bd..0392902f8f 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -921,6 +921,7 @@ async def add_docket_entries(
             get_params = deepcopy(params)
             if de_created is False and not appelate_court_id_exists:
                 del get_params["document_type"]
+                get_params["pacer_doc_id"] = docket_entry["pacer_doc_id"]
             rd = await RECAPDocument.objects.aget(**get_params)
             rds_updated.append(rd)
         except RECAPDocument.DoesNotExist:
@@ -1715,7 +1716,7 @@ async def merge_attachment_page_data(
     main_rd_to_att = False
     for attachment in attachment_dicts:
         sanity_checks = [
-            attachment["attachment_number"],
+            attachment.get("attachment_number") is not None,
             # Missing on sealed items.
             attachment.get("pacer_doc_id", False),
             attachment["description"],

From e0c210cecc0a8152b089c3ccc79a55abd73bbea1 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 29 Aug 2024 16:57:07 -0500
Subject: [PATCH 297/372] feat(recap): Added tests for main document
 pacer_doc_id mismatches

---
 cl/recap/factories.py |   8 +++
 cl/recap/tests.py     | 138 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 145 insertions(+), 1 deletion(-)

diff --git a/cl/recap/factories.py b/cl/recap/factories.py
index 0b04778fe4..1b3562f8ba 100644
--- a/cl/recap/factories.py
+++ b/cl/recap/factories.py
@@ -141,3 +141,11 @@ class DocketDataFactory(DictFactory):
         length=5, chars=string.ascii_lowercase
     )
     federal_defendant_number = Faker("pyint", min_value=1, max_value=999)
+
+
+class DocketEntryWithAttachmentsDataFactory(MinuteDocketEntryDataFactory):
+    attachments = List([SubFactory(AppellateAttachmentPageFactory)])
+
+
+class DocketDataWithAttachmentsFactory(DocketDataFactory):
+    docket_entries = List([SubFactory(DocketEntryWithAttachmentsDataFactory)])
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 62c7db23d9..87263610cd 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -56,8 +56,10 @@
     AppellateAttachmentFactory,
     AppellateAttachmentPageFactory,
     DocketDataFactory,
+    DocketDataWithAttachmentsFactory,
     DocketEntriesDataFactory,
     DocketEntryDataFactory,
+    DocketEntryWithAttachmentsDataFactory,
     FjcIntegratedDatabaseFactory,
     MinuteDocketEntryDataFactory,
     PacerFetchQueueFactory,
@@ -2600,7 +2602,7 @@ def test_merge_docket_number_components(
 class RecapDocketAttachmentTaskTest(TestCase):
     @classmethod
     def setUpTestData(cls):
-        CourtFactory(id="cand", jurisdiction="FD")
+        cls.court = CourtFactory(id="cand", jurisdiction="FD")
 
     def setUp(self) -> None:
         self.user = User.objects.get(username="recap")
@@ -2640,6 +2642,140 @@ def test_attachments_get_created(self, mock):
         self.pq.refresh_from_db()
         self.assertEqual(self.pq.status, PROCESSING_STATUS.SUCCESSFUL)
 
+    @mock.patch(
+        "cl.api.webhooks.requests.post",
+        side_effect=lambda *args, **kwargs: MockResponse(200, mock_raw=True),
+    )
+    def test_main_document_doesnt_match_attachment_zero_on_creation(
+        self,
+        mock_solr,
+        mock_webhook_post,
+    ):
+        """Confirm that attachment 0 is properly set as the Main document if
+        the docket entry's pacer_doc_id does not match the Main document's
+        pacer_doc_id on creation.
+        """
+        docket = DocketFactory(
+            source=Docket.RECAP,
+            court=self.court,
+            pacer_case_id="238743",
+        )
+        docket_data = DocketDataWithAttachmentsFactory(
+            docket_entries=[
+                DocketEntryWithAttachmentsDataFactory(
+                    document_number=1,
+                    pacer_doc_id="1234567",
+                    attachments=[
+                        AppellateAttachmentFactory(
+                            attachment_number=0,
+                            pacer_doc_id="1234566",
+                        ),
+                        AppellateAttachmentFactory(
+                            attachment_number=1,
+                            pacer_doc_id="1234567",
+                        ),
+                    ],
+                ),
+            ],
+        )
+        async_to_sync(add_docket_entries)(
+            docket, docket_data["docket_entries"]
+        )
+        main_rd = RECAPDocument.objects.get(pacer_doc_id="1234566")
+        attachment_1 = RECAPDocument.objects.get(pacer_doc_id="1234567")
+        self.assertEqual(
+            main_rd.document_type,
+            RECAPDocument.PACER_DOCUMENT,
+            msg="PACER_DOCUMENT type didn't match.",
+        )
+        self.assertEqual(main_rd.attachment_number, None)
+        self.assertEqual(
+            attachment_1.document_type,
+            RECAPDocument.ATTACHMENT,
+            msg="ATTACHMENT type didn't match.",
+        )
+        self.assertEqual(attachment_1.attachment_number, 1)
+
+    @mock.patch(
+        "cl.api.webhooks.requests.post",
+        side_effect=lambda *args, **kwargs: MockResponse(200, mock_raw=True),
+    )
+    def test_main_document_doesnt_match_attachment_zero_existing(
+        self,
+        mock_solr,
+        mock_webhook_post,
+    ):
+        """Confirm that attachment 0 is properly set as the Main document if
+        the docket entry's pacer_doc_id does not match the Main document's
+        pacer_doc_id on an existing document.
+        """
+        docket = DocketFactory(
+            source=Docket.RECAP,
+            court=self.court,
+            pacer_case_id="238743",
+        )
+        docket_data_no_att = DocketDataWithAttachmentsFactory(
+            docket_entries=[
+                DocketEntryWithAttachmentsDataFactory(
+                    document_number=1, pacer_doc_id="1234567", attachments=[]
+                ),
+            ],
+        )
+        async_to_sync(add_docket_entries)(
+            docket, docket_data_no_att["docket_entries"]
+        )
+
+        # When attachment data is unknown, the main PACER_DOCUMENT should be
+        # set to pacer_doc_id 1234567.
+        main_rd = RECAPDocument.objects.get(pacer_doc_id="1234567")
+        self.assertEqual(
+            main_rd.document_type,
+            RECAPDocument.PACER_DOCUMENT,
+            msg="PACER_DOCUMENT type didn't match.",
+        )
+        self.assertEqual(main_rd.attachment_number, None)
+
+        docket_data_att = DocketDataWithAttachmentsFactory(
+            docket_entries=[
+                DocketEntryWithAttachmentsDataFactory(
+                    document_number=1,
+                    pacer_doc_id="1234567",
+                    attachments=[
+                        AppellateAttachmentFactory(
+                            attachment_number=0,
+                            pacer_doc_id="1234566",
+                        ),
+                        AppellateAttachmentFactory(
+                            attachment_number=1,
+                            pacer_doc_id="1234567",
+                        ),
+                    ],
+                ),
+            ],
+        )
+        async_to_sync(add_docket_entries)(
+            docket, docket_data_att["docket_entries"]
+        )
+
+        # After merging attachments, the main PACER_DOCUMENT should now be set
+        # to attachment 0 with pacer_doc_id 1234566.
+        main_rd = RECAPDocument.objects.get(pacer_doc_id="1234566")
+        self.assertEqual(
+            main_rd.document_type,
+            RECAPDocument.PACER_DOCUMENT,
+            msg="PACER_DOCUMENT type didn't match.",
+        )
+        self.assertEqual(main_rd.attachment_number, None)
+
+        # pacer_doc_id 1234567 should now be an attachment.
+        attachment_1 = RECAPDocument.objects.get(pacer_doc_id="1234567")
+        self.assertEqual(
+            attachment_1.document_type,
+            RECAPDocument.ATTACHMENT,
+            msg="ATTACHMENT type didn't match.",
+        )
+        self.assertEqual(attachment_1.attachment_number, 1)
+
 
 class ClaimsRegistryTaskTest(TestCase):
     """Can we handle claims registry uploads?"""

From 168a6ef36d6f0d3c8bfc28fcafd044eee29e27af Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Thu, 29 Aug 2024 16:31:18 -0600
Subject: [PATCH 298/372] Create attachment 0 as PACER_DOCUMENT type

---
 cl/recap/mergers.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 0392902f8f..3a40c62b8f 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -1743,9 +1743,12 @@ async def merge_attachment_page_data(
             params = {
                 "docket_entry": de,
                 "document_number": document_number,
-                "attachment_number": attachment["attachment_number"],
-                "document_type": RECAPDocument.ATTACHMENT,
             }
+            if attachment["attachment_number"] == 0:
+                params["document_type"] = RECAPDocument.PACER_DOCUMENT
+            else:
+                params["attachment_number"] = attachment["attachment_number"]
+                params["document_type"] = RECAPDocument.ATTACHMENT
             if "acms_document_guid" in attachment:
                 params["acms_document_guid"] = attachment["acms_document_guid"]
             try:
@@ -1753,11 +1756,11 @@ async def merge_attachment_page_data(
             except RECAPDocument.DoesNotExist:
                 try:
                     doc_id_params = deepcopy(params)
-                    del doc_id_params["attachment_number"]
+                    doc_id_params.pop("attachment_number", None)
                     del doc_id_params["document_type"]
                     doc_id_params["pacer_doc_id"] = attachment["pacer_doc_id"]
                     rd = await RECAPDocument.objects.aget(**doc_id_params)
-                    if attachment.get("attachment_number") == 0:
+                    if attachment["attachment_number"] == 0:
                         try:
                             old_main_rd = await RECAPDocument.objects.aget(
                                 de=de,
@@ -1780,6 +1783,22 @@ async def merge_attachment_page_data(
                         rd.document_type = RECAPDocument.ATTACHMENT
                 except RECAPDocument.DoesNotExist:
                     rd = RECAPDocument(**params)
+                    if attachment["attachment_number"] == 0:
+                        try:
+                            old_main_rd = await RECAPDocument.objects.aget(
+                                de=de,
+                                document_type=RECAPDocument.PACER_DOCUMENT,
+                            )
+                            rd.description = old_main_rd.description
+                        except RECAPDocument.DoesNotExist:
+                            rd.description = ""
+                        except RECAPDocument.MultipleObjectsReturned:
+                            rd.description = ""
+                            logger.info(
+                                f"Failed to migrate description for "
+                                f"{attachment["pacer_doc_id"]}, "
+                                f"multiple source documents found."
+                            )
                     rds_created.append(rd)
 
         rds_affected.append(rd)

From 4bc8af3466cb06d32250090c23ecb0fd37f2b8d9 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 29 Aug 2024 19:20:05 -0500
Subject: [PATCH 299/372] fix(recap): Included assertions to confirm RD
 descriptions after merging attachments

---
 cl/recap/tests.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 87263610cd..d070e81752 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -2669,10 +2669,12 @@ def test_main_document_doesnt_match_attachment_zero_on_creation(
                         AppellateAttachmentFactory(
                             attachment_number=0,
                             pacer_doc_id="1234566",
+                            description="Complaint",
                         ),
                         AppellateAttachmentFactory(
                             attachment_number=1,
                             pacer_doc_id="1234567",
+                            description="Attachment 1",
                         ),
                     ],
                 ),
@@ -2689,12 +2691,15 @@ def test_main_document_doesnt_match_attachment_zero_on_creation(
             msg="PACER_DOCUMENT type didn't match.",
         )
         self.assertEqual(main_rd.attachment_number, None)
+        self.assertEqual(main_rd.description, "Complaint")
+
         self.assertEqual(
             attachment_1.document_type,
             RECAPDocument.ATTACHMENT,
             msg="ATTACHMENT type didn't match.",
         )
         self.assertEqual(attachment_1.attachment_number, 1)
+        self.assertEqual(attachment_1.description, "Attachment 1")
 
     @mock.patch(
         "cl.api.webhooks.requests.post",
@@ -2744,10 +2749,12 @@ def test_main_document_doesnt_match_attachment_zero_existing(
                         AppellateAttachmentFactory(
                             attachment_number=0,
                             pacer_doc_id="1234566",
+                            description="Complaint",
                         ),
                         AppellateAttachmentFactory(
                             attachment_number=1,
                             pacer_doc_id="1234567",
+                            description="Attachment 1",
                         ),
                     ],
                 ),
@@ -2766,6 +2773,7 @@ def test_main_document_doesnt_match_attachment_zero_existing(
             msg="PACER_DOCUMENT type didn't match.",
         )
         self.assertEqual(main_rd.attachment_number, None)
+        self.assertEqual(main_rd.description, "Complaint")
 
         # pacer_doc_id 1234567 should now be an attachment.
         attachment_1 = RECAPDocument.objects.get(pacer_doc_id="1234567")
@@ -2775,6 +2783,7 @@ def test_main_document_doesnt_match_attachment_zero_existing(
             msg="ATTACHMENT type didn't match.",
         )
         self.assertEqual(attachment_1.attachment_number, 1)
+        self.assertEqual(attachment_1.description, "Attachment 1")
 
 
 class ClaimsRegistryTaskTest(TestCase):

From 273916e906ef2838fa475b6cfe29bee32a1a1100 Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Thu, 29 Aug 2024 18:37:36 -0600
Subject: [PATCH 300/372] Fix attachment description tests

---
 cl/recap/tests.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index d070e81752..40e52a0dca 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -2665,11 +2665,12 @@ def test_main_document_doesnt_match_attachment_zero_on_creation(
                 DocketEntryWithAttachmentsDataFactory(
                     document_number=1,
                     pacer_doc_id="1234567",
+                    short_description="Complaint",
                     attachments=[
                         AppellateAttachmentFactory(
                             attachment_number=0,
                             pacer_doc_id="1234566",
-                            description="Complaint",
+                            description="Main Document",
                         ),
                         AppellateAttachmentFactory(
                             attachment_number=1,
@@ -2745,11 +2746,12 @@ def test_main_document_doesnt_match_attachment_zero_existing(
                 DocketEntryWithAttachmentsDataFactory(
                     document_number=1,
                     pacer_doc_id="1234567",
+                    short_description="Complaint",
                     attachments=[
                         AppellateAttachmentFactory(
                             attachment_number=0,
                             pacer_doc_id="1234566",
-                            description="Complaint",
+                            description="Main Document",
                         ),
                         AppellateAttachmentFactory(
                             attachment_number=1,

From 6d346282f68b4bf9cf561276e935c7acf5da393e Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Fri, 30 Aug 2024 17:02:14 -0600
Subject: [PATCH 301/372] Fix docket_entry arg

---
 cl/recap/mergers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 3a40c62b8f..449ac2fffe 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -1763,7 +1763,7 @@ async def merge_attachment_page_data(
                     if attachment["attachment_number"] == 0:
                         try:
                             old_main_rd = await RECAPDocument.objects.aget(
-                                de=de,
+                                docket_entry=de,
                                 document_type=RECAPDocument.PACER_DOCUMENT,
                             )
                             rd.description = old_main_rd.description
@@ -1786,7 +1786,7 @@ async def merge_attachment_page_data(
                     if attachment["attachment_number"] == 0:
                         try:
                             old_main_rd = await RECAPDocument.objects.aget(
-                                de=de,
+                                docket_entry=de,
                                 document_type=RECAPDocument.PACER_DOCUMENT,
                             )
                             rd.description = old_main_rd.description

From ccd94c409ff9c0c8755e08e6e2b6642fe8b81f70 Mon Sep 17 00:00:00 2001
From: Ansel Halliburton <anseljh@users.noreply.github.com>
Date: Sat, 31 Aug 2024 16:20:10 -0700
Subject: [PATCH 302/372] Fix typo in citation API documentation

---
 cl/api/templates/citation-api-docs-vlatest.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/api/templates/citation-api-docs-vlatest.html b/cl/api/templates/citation-api-docs-vlatest.html
index 8787217ce8..2d7be0afc8 100644
--- a/cl/api/templates/citation-api-docs-vlatest.html
+++ b/cl/api/templates/citation-api-docs-vlatest.html
@@ -80,7 +80,7 @@ <h2 id="cites-endpoint">Opinions Cited/Citing API<br><small><code>{% url "opinio
   }
 }</pre>
   <p>To understand <code>RelatedFilters</code>, see our <a href="{% url "rest_docs" version="v3" %}">filtering documentation</a>.</p>
-  <p>These filters allow you to filter to the opinions that an opinion cites (it's "Authorities") or the opinions that cite it.
+  <p>These filters allow you to filter to the opinions that an opinion cites (its "Authorities") or the opinions that cite it.
   </p>
   <p>For example, opinion <code>2812209</code> is the decision in <em>Obergefell v. Hodges</em>. To see what it cites:</p>
   <pre class="pre-scrollable tall">curl -v \

From 18773ba028943aa11b3757e93118552edb98de3f Mon Sep 17 00:00:00 2001
From: Ansel Halliburton <anseljh@users.noreply.github.com>
Date: Sat, 31 Aug 2024 17:00:15 -0700
Subject: [PATCH 303/372] Mention backward and forward citations

These are key phrases people might search for.
---
 cl/api/templates/citation-api-docs-vlatest.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/api/templates/citation-api-docs-vlatest.html b/cl/api/templates/citation-api-docs-vlatest.html
index 2d7be0afc8..2fd1fc4883 100644
--- a/cl/api/templates/citation-api-docs-vlatest.html
+++ b/cl/api/templates/citation-api-docs-vlatest.html
@@ -80,7 +80,7 @@ <h2 id="cites-endpoint">Opinions Cited/Citing API<br><small><code>{% url "opinio
   }
 }</pre>
   <p>To understand <code>RelatedFilters</code>, see our <a href="{% url "rest_docs" version="v3" %}">filtering documentation</a>.</p>
-  <p>These filters allow you to filter to the opinions that an opinion cites (its "Authorities") or the opinions that cite it.
+  <p>These filters allow you to filter to the opinions that an opinion cites (its "Authorities" or backward citations) or the later opinions that cite it (forward citations).
   </p>
   <p>For example, opinion <code>2812209</code> is the decision in <em>Obergefell v. Hodges</em>. To see what it cites:</p>
   <pre class="pre-scrollable tall">curl -v \

From feb5eff066a06a83cd32ea6f624c74d197edf728 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 2 Sep 2024 12:30:50 -0600
Subject: [PATCH 304/372] fix(ordering): changes in harvard function to avoid
 high memory usage

---
 .../commands/update_opinions_order.py          | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index f499f45fa0..9b2720961f 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -32,9 +32,10 @@ def sort_harvard_opinions(options: dict) -> None:
     harvard_clusters = (
         OpinionCluster.objects.exclude(filepath_json_harvard="")
         .annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1)
+        .filter(opinions_count__gt=1, source__contains=SOURCES.HARVARD_CASELAW)
         .exclude(source__contains=SOURCES.COLUMBIA_ARCHIVE)
         .order_by("id")
+        .values_list("id", flat=True)
     )
     if skip_until:
         harvard_clusters = harvard_clusters.filter(pk__gte=skip_until)
@@ -45,13 +46,16 @@ def sort_harvard_opinions(options: dict) -> None:
     logger.info(f"Harvard clusters to process: {harvard_clusters.count()}")
 
     completed = 0
-    for cluster in harvard_clusters.iterator():
-        logger.info(f"Processing cluster id: {cluster}")
+    for cluster_id in harvard_clusters:
+        logger.info(f"Processing cluster id: {cluster_id}")
         opinion_order = 1
         any_update = False
         with transaction.atomic():
+            opinions = Opinion.objects.filter(cluster_id=cluster_id).order_by(
+                "id"
+            )
             # We need to make sure they are ordered by id
-            for cluster_op in cluster.sub_opinions.all().order_by("id"):
+            for cluster_op in opinions:
                 if cluster_op.type == Opinion.COMBINED:
                     continue
                 cluster_op.ordering_key = opinion_order
@@ -62,11 +66,11 @@ def sort_harvard_opinions(options: dict) -> None:
                 # We want to know if you found anything unexpected, like for example
                 # only having combined opinions
                 logger.info(
-                    f"No sub_opinions updated for cluster id: {cluster}"
+                    f"No sub_opinions updated for cluster id: {cluster_id}"
                 )
                 continue
             logger.info(
-                msg=f"Harvard opinions reordered for cluster id: {cluster.id}"
+                msg=f"Harvard opinions reordered for cluster id: {cluster_id}"
             )
             completed += 1
             # Wait between each processed cluster to avoid issues with redis memory
@@ -170,7 +174,7 @@ def sort_columbia_opinions(options: dict) -> None:
 
     completed = 0
     logger.info(f"Columbia clusters to process: {clusters.count()}")
-    for cluster_id in clusters.iterator():
+    for cluster_id in clusters:
         logger.info(f"Starting opinion cluster: {cluster_id}")
         opinions = (
             Opinion.objects.filter(cluster=cluster_id)

From 6977a5d1ad806b23b4abec6a762565d3d44071a5 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 3 Sep 2024 12:25:37 -0500
Subject: [PATCH 305/372] fix(elasticsearch): Disable stemming for case_name
 filter

Fixes: #3887
---
 cl/lib/elasticsearch_utils.py     |   6 +-
 cl/search/tests/tests_es_recap.py | 102 ++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 6d566f6459..d3123d2dd5 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -728,7 +728,7 @@ def build_es_plain_filters(cd: CleanData) -> List:
         )
         # Build caseName terms filter
         queries_list.extend(
-            build_text_filter("caseName", cd.get("case_name", ""))
+            build_text_filter("caseName.exact", cd.get("case_name", ""))
         )
         # Build judge terms filter
         queries_list.extend(build_text_filter("judge", cd.get("judge", "")))
@@ -2207,7 +2207,7 @@ def build_join_es_filters(cd: CleanData) -> List:
                         cd.get("court", "").split()
                     ),
                 ),
-                *build_text_filter("caseName", cd.get("case_name", "")),
+                *build_text_filter("caseName.exact", cd.get("case_name", "")),
                 *build_term_query(
                     "docketNumber",
                     cd.get("docket_number", ""),
@@ -2246,7 +2246,7 @@ def build_join_es_filters(cd: CleanData) -> List:
                         cd.get("court", "").split()
                     ),
                 ),
-                *build_text_filter("caseName", cd.get("case_name", "")),
+                *build_text_filter("caseName.exact", cd.get("case_name", "")),
                 *build_daterange_query(
                     "dateFiled",
                     cd.get("filed_before", ""),
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 804d0dd218..69103a334b 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2698,6 +2698,108 @@ async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         self.assertEqual(mock_fetch_es.call_count, 5)
         cache.clear()
 
+    def test_disable_stemming_on_case_name_filter(self) -> None:
+        """Confirm that stemming is disabled on the case_name filter and that
+        this change doesn't affect text queries matching case_name.
+        """
+
+        with self.captureOnCommitCallbacks(execute=True):
+            de = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court_2,
+                    case_name="Howell v. Indiana",
+                    docket_number="1:21-bk-1235",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            RECAPDocumentFactory(
+                docket_entry=de,
+                document_number="1",
+                is_available=False,
+                pacer_doc_id=None,
+            )
+            RECAPDocumentFactory(
+                docket_entry=de,
+                document_number="2",
+                is_available=False,
+                pacer_doc_id=None,
+            )
+
+            de_2 = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court_2,
+                    case_name="Howells v. Google",
+                    docket_number="1:21-bk-1235",
+                    source=Docket.RECAP,
+                ),
+                entry_number=1,
+                date_filed=datetime.date(2015, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+
+        # case_name filter: Howell
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "case_name": "Howell",
+        }
+        r = async_to_sync(self._test_article_count)(cd, 1, "Disable stemming")
+        self._count_child_documents(
+            0, r.content.decode(), 2, "case_name filter"
+        )
+        self.assertIn("<mark>Howell</mark>", r.content.decode())
+
+        # case_name filter: Howell + document_number 1
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "case_name": "Howell",
+            "document_number": 1,
+        }
+        r = async_to_sync(self._test_article_count)(
+            cd, 1, "Disable stemming case_name + child filter."
+        )
+        self._count_child_documents(
+            0, r.content.decode(), 1, "case_name + child filter"
+        )
+        self.assertIn("<mark>Howell</mark>", r.content.decode())
+
+        # case_name filter: Howells
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "case_name": "Howells",
+        }
+        r = async_to_sync(self._test_article_count)(cd, 1, "Disable stemming")
+        self.assertIn("<mark>Howells</mark>", r.content.decode())
+
+        # text query: Howell (stemming is not disabled)
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "Howell",
+        }
+        r = async_to_sync(self._test_article_count)(cd, 2, "text query")
+        self.assertIn("<mark>Howell</mark>", r.content.decode())
+
+        # text query: Howells (stemming is not disabled)
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "Howells",
+        }
+        r = async_to_sync(self._test_article_count)(cd, 2, "Disable stemming")
+        self.assertIn("<mark>Howells</mark>", r.content.decode())
+
+        # text query: Howell ind (stemming is not disabled but synonyms support)
+        cd = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "Howell ind",
+        }
+        r = async_to_sync(self._test_article_count)(cd, 1, "Disable stemming")
+        self.assertIn("<mark>Indiana</mark>", r.content.decode())
+
+        de.docket.delete()
+        de_2.docket.delete()
+
 
 class RECAPSearchAPICommonTests(RECAPSearchTestCase):
 

From 7d700986e039891ed6e321371d1dc59d229c8634 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 3 Sep 2024 15:43:47 -0500
Subject: [PATCH 306/372] fix(elasticsearch): Use caseName.exact version on
 text queries

- Introduced tests for OA and Opinions and fixed conflicts
---
 cl/lib/elasticsearch_utils.py              | 11 ++--
 cl/lib/test_helpers.py                     |  6 +--
 cl/search/constants.py                     | 13 +++--
 cl/search/tests/tests_es_opinion.py        | 49 +++++++++++++++++-
 cl/search/tests/tests_es_oral_arguments.py | 58 +++++++++++++++++++++-
 cl/search/tests/tests_es_recap.py          | 23 +++++----
 6 files changed, 134 insertions(+), 26 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index d3123d2dd5..5db523e02c 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -216,6 +216,7 @@ def add_fields_boosting(
         SEARCH_TYPES.RECAP,
         SEARCH_TYPES.DOCKETS,
         SEARCH_TYPES.RECAP_DOCUMENT,
+        SEARCH_TYPES.OPINION,
     ]:
         qf = BOOSTS["es"][cd["type"]].copy()
 
@@ -241,7 +242,7 @@ def add_fields_boosting(
         matter_of_query = query.lower().startswith("matter of ")
         ex_parte_query = query.lower().startswith("ex parte ")
         if any([vs_query, in_re_query, matter_of_query, ex_parte_query]):
-            qf.update({"caseName": 50})
+            qf.update({"caseName.exact": 50})
 
     if fields:
         qf = {key: value for key, value in qf.items() if key in fields}
@@ -1148,7 +1149,7 @@ def build_es_base_query(
                         "description",
                         # Docket Fields
                         "docketNumber",
-                        "caseName",
+                        "caseName.exact",
                     ],
                 )
             )
@@ -1159,7 +1160,7 @@ def build_es_base_query(
                     cd,
                     [
                         "docketNumber",
-                        "caseName",
+                        "caseName.exact",
                     ],
                 )
             )
@@ -1185,7 +1186,7 @@ def build_es_base_query(
                     [
                         "type",
                         "text",
-                        "caseName",
+                        "caseName.exact",
                         "docketNumber",
                     ],
                 ),
@@ -1196,7 +1197,7 @@ def build_es_base_query(
                 add_fields_boosting(
                     cd,
                     [
-                        "caseName",
+                        "caseName.exact",
                         "docketNumber",
                     ],
                 )
diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py
index 69976430f1..22c6b9d96c 100644
--- a/cl/lib/test_helpers.py
+++ b/cl/lib/test_helpers.py
@@ -1533,7 +1533,7 @@ def setUpTestData(cls):
             sha1="a49ada009774496ac01fb49818837e2296705c92",
         )
         cls.audio_3 = AudioFactory.create(
-            case_name="Hong Liu Yang v. Lynch-Loretta E.",
+            case_name="Hong Liu Yang v. Lynch-Loretta E. Howell",
             docket_id=cls.docket_3.pk,
             duration=653,
             judges="Joseph Information Deposition H Administrative magazine",
@@ -1552,10 +1552,10 @@ def setUpTestData(cls):
         )
         cls.audio_4.panel.add(cls.author)
         cls.audio_5 = AudioFactory.create(
-            case_name="Freedom of Inform Wikileaks",
+            case_name="Freedom of Inform Wikileaks Howells",
             docket_id=cls.docket_4.pk,
             duration=400,
-            judges="Wallace to Friedland ⚖️ Deposit xx-xxxx apa magistrate",
+            judges="Wallace to Friedland ⚖️ Deposit xx-xxxx apa magistrate Freedom of Inform Wikileaks",
             sha1="a49ada009774496ac01fb49818837e2296705c95",
         )
         cls.audio_1.panel.add(cls.author)
diff --git a/cl/search/constants.py b/cl/search/constants.py
index 468df25a95..a210beb801 100644
--- a/cl/search/constants.py
+++ b/cl/search/constants.py
@@ -102,7 +102,6 @@
     "citation",
     "judge",
     "caseNameFull",
-    "caseName",
     "status",
     "suitNature",
     "attorney",
@@ -226,12 +225,19 @@
 }
 recap_boosts_es = {
     # Docket fields
-    "caseName": 4.0,
+    "caseName.exact": 4.0,
     "docketNumber": 3.0,
     # RECAPDocument fields:
     "description": 2.0,
 }
 recap_boosts_pf = {"text": 3.0, "caseName": 3.0, "description": 3.0}
+opinion_boosts_es = {
+    "text": 1.0,
+    "type": 1.0,
+    # Cluster fields
+    "caseName.exact": 4.0,
+    "docketNumber": 2.0,
+}
 BOOSTS: Dict[str, Dict[str, Dict[str, float]]] = {
     "qf": {
         SEARCH_TYPES.OPINION: {
@@ -246,7 +252,7 @@
         SEARCH_TYPES.RECAP_DOCUMENT: recap_boosts_qf,
         SEARCH_TYPES.ORAL_ARGUMENT: {
             "text": 1.0,
-            "caseName": 4.0,
+            "caseName.exact": 4.0,
             "docketNumber": 2.0,
         },
         SEARCH_TYPES.PEOPLE: {
@@ -264,6 +270,7 @@
         SEARCH_TYPES.RECAP: recap_boosts_es,
         SEARCH_TYPES.DOCKETS: recap_boosts_es,
         SEARCH_TYPES.RECAP_DOCUMENT: recap_boosts_es,
+        SEARCH_TYPES.OPINION: opinion_boosts_es,
     },
     # Phrase-based boosts.
     "pf": {
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index e901e314c6..1d99b54d23 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1204,7 +1204,7 @@ def setUpTestData(cls):
             full_name="court of the Medical Worries",
         )
         OpinionClusterFactoryWithChildrenAndParents(
-            case_name="Strickland v. Washington.",
+            case_name="Strickland v. Washington. Howell",
             case_name_full="Strickland v. Washington.",
             docket=DocketFactory(
                 court=court,
@@ -1230,7 +1230,7 @@ def setUpTestData(cls):
             scdb_votes_majority=6,
         )
         OpinionClusterFactoryWithChildrenAndParents(
-            case_name="Strickland v. Lorem.",
+            case_name="Strickland v. Lorem. Howells",
             case_name_full="Strickland v. Lorem.",
             date_filed=datetime.date(2020, 8, 15),
             docket=DocketFactory(
@@ -2181,6 +2181,51 @@ def test_drop_missing_citation_from_query(self) -> None:
 
         cluster.delete()
 
+    async def test_uses_exact_version_for_case_name_field(self) -> None:
+        """Confirm that stemming is disabled on the case_name
+        filter and text query.
+        """
+
+        # case_name filter: Howell
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "case_name": "Howell",
+        }
+        r = await self._test_article_count(
+            search_params, 1, "case_name exact filter"
+        )
+        self.assertIn("<mark>Howell</mark>", r.content.decode())
+
+        # case_name filter: Howells
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "case_name": "Howells",
+        }
+        r = await self._test_article_count(
+            search_params, 1, "case_name exact filter"
+        )
+        self.assertIn("<mark>Howells</mark>", r.content.decode())
+
+        # text query: Howell
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "Howell",
+        }
+        r = await self._test_article_count(
+            search_params, 1, "case_name exact query"
+        )
+        self.assertIn("<mark>Howell</mark>", r.content.decode())
+
+        # text query: Howells
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "Howells",
+        }
+        r = await self._test_article_count(
+            search_params, 1, "case_name exact query"
+        )
+        self.assertIn("<mark>Howells</mark>", r.content.decode())
+
 
 class RelatedSearchTest(
     ESIndexTestCase, CourtTestCase, PeopleTestCase, SearchTestCase, TestCase
diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py
index b435ad08eb..9c36c046f5 100644
--- a/cl/search/tests/tests_es_oral_arguments.py
+++ b/cl/search/tests/tests_es_oral_arguments.py
@@ -2189,7 +2189,8 @@ def test_stemming_disable_search(self) -> None:
         self.assertEqual(actual, expected)
         self.assertIn("Freedom of", r.content.decode())
         self.assertIn("<mark>Inform</mark>", r.content.decode())
-        self.assertEqual(r.content.decode().count("<mark>Inform</mark>"), 1)
+        print(" r.content.decode()", r.content.decode())
+        self.assertEqual(r.content.decode().count("<mark>Inform</mark>"), 2)
         self.assertEqual(r.content.decode().count("<mark>Deposit</mark>"), 1)
 
     def test_exact_and_synonyms_query(self) -> None:
@@ -2406,6 +2407,59 @@ def test_search_transcript(self) -> None:
             "<mark>This is the best transcript</mark>", r.content.decode()
         )
 
+    def test_uses_exact_version_for_case_name_field(self) -> None:
+        """Confirm that stemming is disabled on the case_name
+        filter and text query.
+        """
+
+        # case_name filter: Howell
+        search_params = {
+            "type": SEARCH_TYPES.ORAL_ARGUMENT,
+            "case_name": "Howell",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        self.assertEqual(self.get_article_count(r), 1)
+        self.assertIn("<mark>Howell</mark>", r.content.decode())
+
+        # case_name filter: Howells
+        search_params = {
+            "type": SEARCH_TYPES.ORAL_ARGUMENT,
+            "case_name": "Howells",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        self.assertEqual(self.get_article_count(r), 1)
+        self.assertIn("<mark>Howells</mark>", r.content.decode())
+
+        # text query: Howell
+        search_params = {
+            "type": SEARCH_TYPES.ORAL_ARGUMENT,
+            "q": "Howell",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        self.assertEqual(self.get_article_count(r), 1)
+        self.assertIn("<mark>Howell</mark>", r.content.decode())
+
+        # text query: Howells
+        search_params = {
+            "type": SEARCH_TYPES.ORAL_ARGUMENT,
+            "q": "Howells",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        self.assertEqual(self.get_article_count(r), 1)
+        self.assertIn("<mark>Howells</mark>", r.content.decode())
+
 
 class OralArgumentIndexingTest(
     CountESTasksTestCase, ESIndexTestCase, TransactionTestCase
@@ -2449,7 +2503,7 @@ def test_keep_in_sync_related_OA_objects(self, mock_abort_audio) -> None:
         )
         cd = {
             "type": SEARCH_TYPES.ORAL_ARGUMENT,
-            "q": "Lorem Ipsum Dolor vs. United States",
+            "q": "Lorem Ipsum Dolor vs. USA",
             "order_by": "score desc",
         }
         search_query = AudioDocument.search()
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 69103a334b..83abc27be0 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2698,9 +2698,9 @@ async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         self.assertEqual(mock_fetch_es.call_count, 5)
         cache.clear()
 
-    def test_disable_stemming_on_case_name_filter(self) -> None:
-        """Confirm that stemming is disabled on the case_name filter and that
-        this change doesn't affect text queries matching case_name.
+    def test_uses_exact_version_for_case_name_field(self) -> None:
+        """Confirm that stemming and synonyms are disabled on the case_name
+        filter and text query.
         """
 
         with self.captureOnCommitCallbacks(execute=True):
@@ -2731,7 +2731,7 @@ def test_disable_stemming_on_case_name_filter(self) -> None:
             de_2 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
                     court=self.court_2,
-                    case_name="Howells v. Google",
+                    case_name="Howells v. Indiana",
                     docket_number="1:21-bk-1235",
                     source=Docket.RECAP,
                 ),
@@ -2773,29 +2773,30 @@ def test_disable_stemming_on_case_name_filter(self) -> None:
         r = async_to_sync(self._test_article_count)(cd, 1, "Disable stemming")
         self.assertIn("<mark>Howells</mark>", r.content.decode())
 
-        # text query: Howell (stemming is not disabled)
+        # text query: Howell
         cd = {
             "type": SEARCH_TYPES.RECAP,
             "q": "Howell",
         }
-        r = async_to_sync(self._test_article_count)(cd, 2, "text query")
+        r = async_to_sync(self._test_article_count)(cd, 1, "text query")
         self.assertIn("<mark>Howell</mark>", r.content.decode())
 
-        # text query: Howells (stemming is not disabled)
+        # text query: Howells
         cd = {
             "type": SEARCH_TYPES.RECAP,
             "q": "Howells",
         }
-        r = async_to_sync(self._test_article_count)(cd, 2, "Disable stemming")
+        r = async_to_sync(self._test_article_count)(cd, 1, "Disable stemming")
         self.assertIn("<mark>Howells</mark>", r.content.decode())
 
-        # text query: Howell ind (stemming is not disabled but synonyms support)
+        # text query: Howell ind (stemming and synonyms disabled)
         cd = {
             "type": SEARCH_TYPES.RECAP,
             "q": "Howell ind",
         }
-        r = async_to_sync(self._test_article_count)(cd, 1, "Disable stemming")
-        self.assertIn("<mark>Indiana</mark>", r.content.decode())
+        r = async_to_sync(self._test_article_count)(
+            cd, 0, "Disable stemming and synonyms"
+        )
 
         de.docket.delete()
         de_2.docket.delete()

From a131197360c927484114441665ed4f10ca0b9919 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 3 Sep 2024 15:45:16 -0600
Subject: [PATCH 307/372] feat(ordering): log message when combined opinion is
 found

---
 .../management/commands/update_opinions_order.py               | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 9b2720961f..79ad74c038 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -57,6 +57,9 @@ def sort_harvard_opinions(options: dict) -> None:
             # We need to make sure they are ordered by id
             for cluster_op in opinions:
                 if cluster_op.type == Opinion.COMBINED:
+                    logger.info(
+                        f"Ignoring combined opinion in cluster id: {cluster_id}"
+                    )
                     continue
                 cluster_op.ordering_key = opinion_order
                 cluster_op.save()

From 4a63320c9567f36854b37e07000b1ee65058e683 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 3 Sep 2024 16:49:09 -0500
Subject: [PATCH 308/372] fix(elasticsearch): Fixed Opinions Search tests

---
 cl/lib/test_helpers.py              |  4 ++--
 cl/search/tests/tests_es_opinion.py | 14 +++++++++++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py
index 22c6b9d96c..bb53582df6 100644
--- a/cl/lib/test_helpers.py
+++ b/cl/lib/test_helpers.py
@@ -1000,7 +1000,7 @@ def setUpTestData(cls):
             date_filed=datetime.date(2015, 8, 14),
             procedural_history="some rando history",
             source="C",
-            case_name="Debbas v. Franklin",
+            case_name="Debbas v. Franklin Howells",
             attorneys="a bunch of crooks!",
             slug="case-name-cluster",
             precedential_status="Errata",
@@ -1035,7 +1035,7 @@ def setUpTestData(cls):
             date_filed=datetime.date(2015, 8, 15),
             procedural_history="some rando history",
             source="C",
-            case_name="case name cluster 3",
+            case_name="case name cluster 3 Howell",
             attorneys="a bunch of crooks!",
             slug="case-name-cluster",
             precedential_status="Published",
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 1d99b54d23..aac0f4fbc9 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1204,7 +1204,7 @@ def setUpTestData(cls):
             full_name="court of the Medical Worries",
         )
         OpinionClusterFactoryWithChildrenAndParents(
-            case_name="Strickland v. Washington. Howell",
+            case_name="Strickland v. Washington.",
             case_name_full="Strickland v. Washington.",
             docket=DocketFactory(
                 court=court,
@@ -1230,7 +1230,7 @@ def setUpTestData(cls):
             scdb_votes_majority=6,
         )
         OpinionClusterFactoryWithChildrenAndParents(
-            case_name="Strickland v. Lorem. Howells",
+            case_name="Strickland v. Lorem.",
             case_name_full="Strickland v. Lorem.",
             date_filed=datetime.date(2020, 8, 15),
             docket=DocketFactory(
@@ -2190,6 +2190,8 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         search_params = {
             "type": SEARCH_TYPES.OPINION,
             "case_name": "Howell",
+            "stat_Errata": "on",
+            "stat_Published": "on",
         }
         r = await self._test_article_count(
             search_params, 1, "case_name exact filter"
@@ -2200,6 +2202,8 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         search_params = {
             "type": SEARCH_TYPES.OPINION,
             "case_name": "Howells",
+            "stat_Errata": "on",
+            "stat_Published": "on",
         }
         r = await self._test_article_count(
             search_params, 1, "case_name exact filter"
@@ -2210,6 +2214,8 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         search_params = {
             "type": SEARCH_TYPES.OPINION,
             "q": "Howell",
+            "stat_Errata": "on",
+            "stat_Published": "on",
         }
         r = await self._test_article_count(
             search_params, 1, "case_name exact query"
@@ -2220,6 +2226,8 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         search_params = {
             "type": SEARCH_TYPES.OPINION,
             "q": "Howells",
+            "stat_Errata": "on",
+            "stat_Published": "on",
         }
         r = await self._test_article_count(
             search_params, 1, "case_name exact query"
@@ -2356,7 +2364,7 @@ async def test_more_like_this_opinion_detail_no_filter(self) -> None:
             ),
             (
                 f"/opinion/{self.opinion_cluster_3.pk}/{self.opinion_cluster_3.slug}/?",
-                "case name cluster 3",
+                "case name cluster 3 Howell",
             ),
         ]
 

From d3fc6f112ce732f05a6065283f793b3306e3dabb Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 3 Sep 2024 17:12:55 -0500
Subject: [PATCH 309/372] fix(elasticsearch): Fixed collision in Opinions exact
 case_name test

---
 cl/lib/test_helpers.py              |  4 ++--
 cl/search/tests/tests_es_opinion.py | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py
index bb53582df6..3fefbf47bb 100644
--- a/cl/lib/test_helpers.py
+++ b/cl/lib/test_helpers.py
@@ -1000,7 +1000,7 @@ def setUpTestData(cls):
             date_filed=datetime.date(2015, 8, 14),
             procedural_history="some rando history",
             source="C",
-            case_name="Debbas v. Franklin Howells",
+            case_name="Debbas v. Franklin Lorem Howells",
             attorneys="a bunch of crooks!",
             slug="case-name-cluster",
             precedential_status="Errata",
@@ -1035,7 +1035,7 @@ def setUpTestData(cls):
             date_filed=datetime.date(2015, 8, 15),
             procedural_history="some rando history",
             source="C",
-            case_name="case name cluster 3 Howell",
+            case_name="case name cluster 3 Lorem Howell",
             attorneys="a bunch of crooks!",
             slug="case-name-cluster",
             precedential_status="Published",
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index aac0f4fbc9..77719c5b0a 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -2189,7 +2189,7 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         # case_name filter: Howell
         search_params = {
             "type": SEARCH_TYPES.OPINION,
-            "case_name": "Howell",
+            "case_name": "Lorem Howell",
             "stat_Errata": "on",
             "stat_Published": "on",
         }
@@ -2201,7 +2201,7 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         # case_name filter: Howells
         search_params = {
             "type": SEARCH_TYPES.OPINION,
-            "case_name": "Howells",
+            "case_name": "Lorem Howells",
             "stat_Errata": "on",
             "stat_Published": "on",
         }
@@ -2213,26 +2213,26 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         # text query: Howell
         search_params = {
             "type": SEARCH_TYPES.OPINION,
-            "q": "Howell",
+            "q": "Lorem Howell",
             "stat_Errata": "on",
             "stat_Published": "on",
         }
         r = await self._test_article_count(
             search_params, 1, "case_name exact query"
         )
-        self.assertIn("<mark>Howell</mark>", r.content.decode())
+        self.assertIn("<mark>Lorem Howell</mark>", r.content.decode())
 
         # text query: Howells
         search_params = {
             "type": SEARCH_TYPES.OPINION,
-            "q": "Howells",
+            "q": "Lorem Howells",
             "stat_Errata": "on",
             "stat_Published": "on",
         }
         r = await self._test_article_count(
             search_params, 1, "case_name exact query"
         )
-        self.assertIn("<mark>Howells</mark>", r.content.decode())
+        self.assertIn("<mark>Lorem Howells</mark>", r.content.decode())
 
 
 class RelatedSearchTest(
@@ -2364,7 +2364,7 @@ async def test_more_like_this_opinion_detail_no_filter(self) -> None:
             ),
             (
                 f"/opinion/{self.opinion_cluster_3.pk}/{self.opinion_cluster_3.slug}/?",
-                "case name cluster 3 Howell",
+                "case name cluster 3 Lorem Howell",
             ),
         ]
 

From 0172df036eb232645845a682232fab12265ea702 Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Fri, 30 Aug 2024 20:43:04 -0600
Subject: [PATCH 310/372] Don't apply attachment description to main document

---
 cl/recap/mergers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 449ac2fffe..abe1ada1b0 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -954,10 +954,10 @@ async def add_docket_entries(
             await duplicate_rd_queryset.exclude(pk=rd.pk).adelete()
 
         rd.pacer_doc_id = rd.pacer_doc_id or docket_entry["pacer_doc_id"]
-        description = docket_entry.get("short_description") or rd.description
-        if rd.document_type == RECAPDocument.PACER_DOCUMENT:
+        description = docket_entry.get("short_description")
+        if rd.document_type == RECAPDocument.PACER_DOCUMENT and description:
             rd.description = description
-        else:
+        elif description:
             rd_qs = de.recap_documents.filter(
                 document_type=RECAPDocument.PACER_DOCUMENT
             )

From 099f41bb601dd9b2867d2bbaa55b77ee387e9ff5 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 3 Sep 2024 18:27:40 -0500
Subject: [PATCH 311/372] fix(recap): Added
 test_avoid_deleting_short_description

---
 cl/recap/tests.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 40e52a0dca..4fdd6ba2dc 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -2200,6 +2200,45 @@ def test_retain_existing_values_in_absent_rss_fields(
         self.assertEqual(docket.assigned_to_str, "John Marshall")
         self.assertEqual(docket.referred_to_str, "Sophia Clinton")
 
+    def test_avoid_deleting_short_description(self) -> None:
+        """Test that merging identical docket entries without a
+        short_description does not delete or overwrite the existing short_description
+        """
+        court_ca10 = CourtFactory(id="ca10", jurisdiction="F")
+        rss_feed = PacerRssFeed(court_ca10.pk)
+        with open(self.make_path("rss_ca10.xml"), "rb") as f:
+            text = f.read().decode()
+        rss_feed._parse_text(text)
+        docket = rss_feed.data[0]
+        d = async_to_sync(find_docket_object)(
+            court_ca10.pk,
+            docket["pacer_case_id"],
+            docket["docket_number"],
+            docket["federal_defendant_number"],
+            docket["federal_dn_judge_initials_assigned"],
+            docket["federal_dn_judge_initials_referred"],
+        )
+        async_to_sync(update_docket_metadata)(d, docket)
+        d.save()
+        async_to_sync(add_docket_entries)(d, docket["docket_entries"])
+        rd = RECAPDocument.objects.all().first()
+        self.assertEqual(rd.description, "Case termination for COA")
+
+        # Merge the identical entry without a short_description.
+        # It should not be removed.
+        docket_entries = [
+            MinuteDocketEntryDataFactory(
+                description="Lorem ipsum",
+                short_description=None,
+                pacer_doc_id="010010808570",
+                document_number="010010808570",
+            ),
+        ]
+        async_to_sync(add_docket_entries)(d, docket_entries)
+        rd = RECAPDocument.objects.all().first()
+        self.assertEqual(d.docket_entries.count(), 1)
+        self.assertEqual(rd.description, "Case termination for COA")
+
 
 class DescriptionCleanupTest(SimpleTestCase):
     def test_cleanup(self) -> None:

From f999a96db02ecfa3cc9ce7e0a95ffa19b7a6adb8 Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Wed, 4 Sep 2024 00:45:57 +0000
Subject: [PATCH 312/372] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 163437447c..69d7b16f60 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2238,13 +2238,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.21"
+version = "2.6.23"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.21-py27-none-any.whl", hash = "sha256:04a77ece539dc76413becafb5ebf9ed19abe16b6fc9669fbff8ffae983dafb82"},
-    {file = "juriscraper-2.6.21.tar.gz", hash = "sha256:b34be14ca6c232a5cf997cf73d04661aa076ec399e0db9bfbcaf59f6cfe200e3"},
+    {file = "juriscraper-2.6.23-py27-none-any.whl", hash = "sha256:15df44d1a037bb5a0f47ecfcd1f1c2163addd42fa893929043f1d717ded5f1f8"},
+    {file = "juriscraper-2.6.23.tar.gz", hash = "sha256:96d5e00a87c456e01e9d8953b545555857539aea4992c355acdad3f854251945"},
 ]
 
 [package.dependencies]

From 070e3b72d63785064ec1eab815c497025278ca45 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 4 Sep 2024 01:13:36 +0000
Subject: [PATCH 313/372] build(deps): bump django from 5.0.8 to 5.1.1

Bumps [django](https://github.com/django/django) from 5.0.8 to 5.1.1.
- [Commits](https://github.com/django/django/compare/5.0.8...5.1.1)

---
updated-dependencies:
- dependency-name: django
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 10 +++++-----
 pyproject.toml |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 69d7b16f60..7b2c38d3da 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -897,17 +897,17 @@ files = [
 
 [[package]]
 name = "django"
-version = "5.0.8"
+version = "5.1.1"
 description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "Django-5.0.8-py3-none-any.whl", hash = "sha256:333a7988f7ca4bc14d360d3d8f6b793704517761ae3813b95432043daec22a45"},
-    {file = "Django-5.0.8.tar.gz", hash = "sha256:ebe859c9da6fead9c9ee6dbfa4943b04f41342f4cea2c4d8c978ef0d10694f2b"},
+    {file = "Django-5.1.1-py3-none-any.whl", hash = "sha256:71603f27dac22a6533fb38d83072eea9ddb4017fead6f67f2562a40402d61c3f"},
+    {file = "Django-5.1.1.tar.gz", hash = "sha256:021ffb7fdab3d2d388bc8c7c2434eb9c1f6f4d09e6119010bbb1694dda286bc2"},
 ]
 
 [package.dependencies]
-asgiref = ">=3.7.0,<4"
+asgiref = ">=3.8.1,<4"
 sqlparse = ">=0.3.1"
 tzdata = {version = "*", markers = "sys_platform == \"win32\""}
 
@@ -5439,4 +5439,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "7b4647b80299ec79c1146ec44d7eed564bf35e259cf821e2a00450d832250d3c"
+content-hash = "7f834dc098055b684e958ea6abc3b16bdd52b0174f9a9ab10d59bf0f3f2c91b4"
diff --git a/pyproject.toml b/pyproject.toml
index 19afc96344..b888435d66 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,7 @@ celery = "^5.3.6"
 certifi = "^2024.7.4"
 courts-db = "*"
 disposable-email-domains = "*"
-Django = "^5.0.8"
+Django = "^5.1.1"
 django-cache-memoize = "==0.*"
 django-cors-headers = "^4.4.0"
 django-csp = "^3.8"

From c61c11456d91f3db3e26bd4b2ac57b042852dd86 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 4 Sep 2024 01:13:43 +0000
Subject: [PATCH 314/372] build(deps): bump boto3 from 1.34.17 to 1.35.11

Bumps [boto3](https://github.com/boto/boto3) from 1.34.17 to 1.35.11.
- [Release notes](https://github.com/boto/boto3/releases)
- [Commits](https://github.com/boto/boto3/compare/1.34.17...1.35.11)

---
updated-dependencies:
- dependency-name: boto3
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 20 ++++++++++----------
 pyproject.toml |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 69d7b16f60..5dc0343008 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -286,17 +286,17 @@ uvloop = ["uvloop (>=0.15.2)"]
 
 [[package]]
 name = "boto3"
-version = "1.34.17"
+version = "1.35.11"
 description = "The AWS SDK for Python"
 optional = false
-python-versions = ">= 3.8"
+python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.34.17-py3-none-any.whl", hash = "sha256:1efc02be786884034d503d59c018cf7650d0cff9fcb37cd2eb49b802a6fe6111"},
-    {file = "boto3-1.34.17.tar.gz", hash = "sha256:8ca248cc84e7e859e4e276eb9c4309fa01a3e58473bf48d6c33448be870c2bb8"},
+    {file = "boto3-1.35.11-py3-none-any.whl", hash = "sha256:f5834dd908edda56c3da86b908693c7cd1c17c2f8150de736e9e90c56ecc78f6"},
+    {file = "boto3-1.35.11.tar.gz", hash = "sha256:bdfb8dd2564e5cf2f5095fb8e3a0a4612202654c1196692dddd9bc48aadc7657"},
 ]
 
 [package.dependencies]
-botocore = ">=1.34.17,<1.35.0"
+botocore = ">=1.35.11,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -305,13 +305,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.34.128"
+version = "1.35.11"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.34.128-py3-none-any.whl", hash = "sha256:db67fda136c372ab3fa432580c819c89ba18d28a6152a4d2a7ea40d44082892e"},
-    {file = "botocore-1.34.128.tar.gz", hash = "sha256:8d8e03f7c8c080ecafda72036eb3b482d649f8417c90b5dca33b7c2c47adb0c9"},
+    {file = "botocore-1.35.11-py3-none-any.whl", hash = "sha256:e9b647b6cf1f63fd701c27433802d1c4342838a37fd152b40fe53b967fd19af4"},
+    {file = "botocore-1.35.11.tar.gz", hash = "sha256:f5f671f8f9566f28bed496017ea94d275ca5c253e9e4f91cd56cb7a293e37d0c"},
 ]
 
 [package.dependencies]
@@ -320,7 +320,7 @@ python-dateutil = ">=2.1,<3.0.0"
 urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
 
 [package.extras]
-crt = ["awscrt (==0.20.11)"]
+crt = ["awscrt (==0.21.2)"]
 
 [[package]]
 name = "celery"
@@ -5439,4 +5439,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "7b4647b80299ec79c1146ec44d7eed564bf35e259cf821e2a00450d832250d3c"
+content-hash = "1efea74fb280b66b178a435a636b944f4bccd3413ba5ca81b286197922fbed6c"
diff --git a/pyproject.toml b/pyproject.toml
index 19afc96344..c3af584fe2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ cl-manage = "manage:main"
 ada-url = "^1.15.0"
 argparse = "*"
 beautifulsoup4 = "==4.12.*"
-boto3 = "^1.34.17"
+boto3 = "^1.35.11"
 celery = "^5.3.6"
 certifi = "^2024.7.4"
 courts-db = "*"

From 0a5a79e98261563d1bbd99faa94c7cf63d46b694 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 4 Sep 2024 13:58:38 -0500
Subject: [PATCH 315/372] fix(elasticsearch): Refactor
 test_uses_exact_version_for_case_name_field

---
 cl/lib/test_helpers.py                     |  4 +-
 cl/search/tests/tests_es_opinion.py        | 49 +++++++++++++---------
 cl/search/tests/tests_es_oral_arguments.py |  1 -
 3 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py
index 3fefbf47bb..22c6b9d96c 100644
--- a/cl/lib/test_helpers.py
+++ b/cl/lib/test_helpers.py
@@ -1000,7 +1000,7 @@ def setUpTestData(cls):
             date_filed=datetime.date(2015, 8, 14),
             procedural_history="some rando history",
             source="C",
-            case_name="Debbas v. Franklin Lorem Howells",
+            case_name="Debbas v. Franklin",
             attorneys="a bunch of crooks!",
             slug="case-name-cluster",
             precedential_status="Errata",
@@ -1035,7 +1035,7 @@ def setUpTestData(cls):
             date_filed=datetime.date(2015, 8, 15),
             procedural_history="some rando history",
             source="C",
-            case_name="case name cluster 3 Lorem Howell",
+            case_name="case name cluster 3",
             attorneys="a bunch of crooks!",
             slug="case-name-cluster",
             precedential_status="Published",
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 77719c5b0a..d84835e259 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -2181,19 +2181,31 @@ def test_drop_missing_citation_from_query(self) -> None:
 
         cluster.delete()
 
-    async def test_uses_exact_version_for_case_name_field(self) -> None:
+    def test_uses_exact_version_for_case_name_field(self) -> None:
         """Confirm that stemming is disabled on the case_name
         filter and text query.
         """
 
+        with self.captureOnCommitCallbacks(execute=True):
+            cluster_1 = OpinionClusterFactory.create(
+                case_name="Maecenas Howell",
+                precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
+                docket=self.docket_1,
+            )
+            OpinionFactory.create(cluster=cluster_1, plain_text="")
+            cluster_2 = OpinionClusterFactory.create(
+                case_name="Maecenas Howells",
+                precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
+                docket=self.docket_1,
+            )
+            OpinionFactory.create(cluster=cluster_2, plain_text="")
+
         # case_name filter: Howell
         search_params = {
             "type": SEARCH_TYPES.OPINION,
-            "case_name": "Lorem Howell",
-            "stat_Errata": "on",
-            "stat_Published": "on",
+            "case_name": "Maecenas Howell",
         }
-        r = await self._test_article_count(
+        r = async_to_sync(self._test_article_count)(
             search_params, 1, "case_name exact filter"
         )
         self.assertIn("<mark>Howell</mark>", r.content.decode())
@@ -2201,11 +2213,9 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         # case_name filter: Howells
         search_params = {
             "type": SEARCH_TYPES.OPINION,
-            "case_name": "Lorem Howells",
-            "stat_Errata": "on",
-            "stat_Published": "on",
+            "case_name": "Maecenas Howells",
         }
-        r = await self._test_article_count(
+        r = async_to_sync(self._test_article_count)(
             search_params, 1, "case_name exact filter"
         )
         self.assertIn("<mark>Howells</mark>", r.content.decode())
@@ -2213,26 +2223,25 @@ async def test_uses_exact_version_for_case_name_field(self) -> None:
         # text query: Howell
         search_params = {
             "type": SEARCH_TYPES.OPINION,
-            "q": "Lorem Howell",
-            "stat_Errata": "on",
-            "stat_Published": "on",
+            "q": "Maecenas Howell",
         }
-        r = await self._test_article_count(
+        r = async_to_sync(self._test_article_count)(
             search_params, 1, "case_name exact query"
         )
-        self.assertIn("<mark>Lorem Howell</mark>", r.content.decode())
+        self.assertIn("<mark>Maecenas Howell</mark>", r.content.decode())
 
         # text query: Howells
         search_params = {
             "type": SEARCH_TYPES.OPINION,
-            "q": "Lorem Howells",
-            "stat_Errata": "on",
-            "stat_Published": "on",
+            "q": "Maecenas Howells",
         }
-        r = await self._test_article_count(
+        r = async_to_sync(self._test_article_count)(
             search_params, 1, "case_name exact query"
         )
-        self.assertIn("<mark>Lorem Howells</mark>", r.content.decode())
+        self.assertIn("<mark>Maecenas Howells</mark>", r.content.decode())
+
+        cluster_1.delete()
+        cluster_2.delete()
 
 
 class RelatedSearchTest(
@@ -2364,7 +2373,7 @@ async def test_more_like_this_opinion_detail_no_filter(self) -> None:
             ),
             (
                 f"/opinion/{self.opinion_cluster_3.pk}/{self.opinion_cluster_3.slug}/?",
-                "case name cluster 3 Lorem Howell",
+                "case name cluster 3",
             ),
         ]
 
diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py
index 9c36c046f5..0b7d6089fd 100644
--- a/cl/search/tests/tests_es_oral_arguments.py
+++ b/cl/search/tests/tests_es_oral_arguments.py
@@ -2189,7 +2189,6 @@ def test_stemming_disable_search(self) -> None:
         self.assertEqual(actual, expected)
         self.assertIn("Freedom of", r.content.decode())
         self.assertIn("<mark>Inform</mark>", r.content.decode())
-        print(" r.content.decode()", r.content.decode())
         self.assertEqual(r.content.decode().count("<mark>Inform</mark>"), 2)
         self.assertEqual(r.content.decode().count("<mark>Deposit</mark>"), 1)
 

From 35be3474bdfe1320a5588c91a77fdfb684e2ebc4 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 4 Sep 2024 14:15:51 -0500
Subject: [PATCH 316/372] fix(elasticsearch): Added print statement to debug
 issue on CI

---
 cl/search/tests/tests_es_opinion.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index d84835e259..d8a0a0e707 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1263,6 +1263,8 @@ def setUpTestData(cls):
 
     async def _test_article_count(self, params, expected_count, field_name):
         r = await self.async_client.get("/", params)
+
+        print("Debug HTML:", r.content.decode())
         tree = html.fromstring(r.content.decode())
         got = len(tree.xpath("//article"))
         self.assertEqual(

From c9512084306cf1de841ff90f55d39ecbf1af019d Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 4 Sep 2024 16:27:16 -0500
Subject: [PATCH 317/372] fix(elasticsearch): Fixed copy
 SEARCH_OPINION_QUERY_FIELDS in build_more_like_this_query

---
 cl/lib/elasticsearch_utils.py       | 2 +-
 cl/search/tests/tests_es_opinion.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 5db523e02c..5e92724a85 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -171,7 +171,7 @@ def build_daterange_query(
 
 def build_more_like_this_query(related_id: list[str]):
     document_list = [{"_id": f"o_{id}"} for id in related_id]
-    more_like_this_fields = SEARCH_OPINION_QUERY_FIELDS
+    more_like_this_fields = SEARCH_OPINION_QUERY_FIELDS.copy()
     more_like_this_fields.extend(
         [
             "type",
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index d8a0a0e707..d84835e259 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -1263,8 +1263,6 @@ def setUpTestData(cls):
 
     async def _test_article_count(self, params, expected_count, field_name):
         r = await self.async_client.get("/", params)
-
-        print("Debug HTML:", r.content.decode())
         tree = html.fromstring(r.content.decode())
         got = len(tree.xpath("//article"))
         self.assertEqual(

From 6196421b0e273f2d02c42bf7e327cce948077c4e Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 4 Sep 2024 17:31:23 -0500
Subject: [PATCH 318/372] fix(elasticsearch): Added prefix to the micro-cache
 key to avoid failing tests

---
 cl/search/tests/tests_es_recap.py |  7 ++++++-
 cl/search/views.py                | 18 ++++++++++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 804d0dd218..af1b85fc03 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -106,7 +106,6 @@ def setUpTestData(cls):
         )
         # Index parties in ES.
         index_docket_parties_in_es.delay(cls.de.docket.pk)
-        cache.clear()
 
     async def _test_article_count(self, params, expected_count, field_name):
         r = await self.async_client.get("/", params)
@@ -2576,6 +2575,12 @@ def test_initial_complaint_button(self) -> None:
     async def test_micro_cache_for_search_results(self, mock_fetch_es) -> None:
         """Assert micro-cache for search results behaves properly."""
 
+        # Clean search_results_cache before starting the test.
+        r = get_redis_interface("CACHE")
+        keys = r.keys("search_results_cache")
+        if keys:
+            r.delete(*keys)
+
         mock_fetch_es.side_effect = lambda *args, **kwargs: fetch_es_results(
             *args, **kwargs
         )
diff --git a/cl/search/views.py b/cl/search/views.py
index a0725c785f..539efaaed3 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -871,9 +871,9 @@ def retrieve_cached_search_results(
     Retrieve cached search results based on the GET parameters.
 
     :param get_params: The GET parameters provided by the user.
-    :return: A two-tuple containing either the cached search results and a hash
-    of the get parameters, or None and the query parameters hash if no cached
-    results were found.
+    :return: A two-tuple containing either the cached search results and the
+    cache key based ona prefix and the get parameters, or None and the cache key
+    if no cached results were found.
     """
 
     params = get_params.copy()
@@ -883,11 +883,13 @@ def retrieve_cached_search_results(
     params.setdefault("page", "1")
     params.setdefault("q", "")
     sorted_params = dict(sorted(params.items()))
+    key_prefix = "search_results_cache:"
     params_hash = sha256(pickle.dumps(sorted_params))
-    cached_results = cache.get(params_hash)
+    cache_key = f"{key_prefix}{params_hash}"
+    cached_results = cache.get(cache_key)
     if cached_results:
-        return pickle.loads(cached_results), params_hash
-    return None, params_hash
+        return pickle.loads(cached_results), cache_key
+    return None, cache_key
 
 
 def fetch_and_paginate_results(
@@ -918,7 +920,7 @@ def fetch_and_paginate_results(
             return results, 0, False, None, None
 
     # Check micro-cache for all other search requests.
-    results_dict, get_params_hash = retrieve_cached_search_results(get_params)
+    results_dict, micro_cache_key = retrieve_cached_search_results(get_params)
     if results_dict:
         # Return results and counts. Set query time to 1ms.
         return (
@@ -972,7 +974,7 @@ def fetch_and_paginate_results(
         }
         serialized_data = pickle.dumps(results_dict)
         cache.set(
-            get_params_hash,
+            micro_cache_key,
             serialized_data,
             settings.SEARCH_RESULTS_MICRO_CACHE,
         )

From fe4ff1f9224bc1ae9cd6b37e2eb6784f49446efa Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 5 Sep 2024 09:34:07 -0500
Subject: [PATCH 319/372] fix(elasticsearch): Fixed
 test_uses_exact_version_for_case_name_field factories to avoid collisions

---
 cl/search/tests/tests_es_recap.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index e4b38e44a0..ec9cdde15d 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2713,6 +2713,8 @@ def test_uses_exact_version_for_case_name_field(self) -> None:
                 docket=DocketFactory(
                     court=self.court_2,
                     case_name="Howell v. Indiana",
+                    case_name_short="Dolor",
+                    case_name_full="Ipsum Dolor",
                     docket_number="1:21-bk-1235",
                     source=Docket.RECAP,
                 ),
@@ -2732,17 +2734,13 @@ def test_uses_exact_version_for_case_name_field(self) -> None:
                 is_available=False,
                 pacer_doc_id=None,
             )
-
-            de_2 = DocketEntryWithParentsFactory(
-                docket=DocketFactory(
-                    court=self.court_2,
-                    case_name="Howells v. Indiana",
-                    docket_number="1:21-bk-1235",
-                    source=Docket.RECAP,
-                ),
-                entry_number=1,
-                date_filed=datetime.date(2015, 8, 19),
-                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            docket_2 = DocketFactory(
+                court=self.court_2,
+                case_name="Howells v. Indiana",
+                case_name_short="Dolor",
+                case_name_full="Lorem Ipsum",
+                docket_number="1:21-bk-1235",
+                source=Docket.RECAP,
             )
 
         # case_name filter: Howell
@@ -2804,7 +2802,7 @@ def test_uses_exact_version_for_case_name_field(self) -> None:
         )
 
         de.docket.delete()
-        de_2.docket.delete()
+        docket_2.delete()
 
 
 class RECAPSearchAPICommonTests(RECAPSearchTestCase):

From 6c65c97fe3cad7880a03dfdd5c9fab5512e3675a Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 5 Sep 2024 15:36:46 -0500
Subject: [PATCH 320/372] fix(django): Updated the code to ensure compatibility
 with Django 5.1.1

---
 cl/favorites/migrations/0001_initial.py       | 10 +++++--
 cl/lasc/migrations/0001_initial.py            |  4 ++-
 cl/recap/factories.py                         |  2 +-
 cl/recap/migrations/0002_initial_part_two.py  |  6 ++--
 cl/search/factories.py                        |  2 +-
 cl/search/migrations/0001_initial.py          | 29 +++++++++++--------
 .../migrations/0006_delete_unused_indexes.py  |  6 ++--
 7 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/cl/favorites/migrations/0001_initial.py b/cl/favorites/migrations/0001_initial.py
index 4e35284e36..8b8cb240dc 100644
--- a/cl/favorites/migrations/0001_initial.py
+++ b/cl/favorites/migrations/0001_initial.py
@@ -40,7 +40,9 @@ class Migration(migrations.Migration):
             ],
             options={
                 'unique_together': {('user', 'name')},
-                'index_together': {('user', 'name')},
+                'indexes': [
+                    models.Index(fields=['user', 'name'], name='favorites_usertag_user_name_idx'),
+                ],
             },
         ),
         migrations.AddField(
@@ -58,7 +60,11 @@ class Migration(migrations.Migration):
                 ('user', models.ForeignKey(help_text='The user that made the prayer', on_delete=django.db.models.deletion.CASCADE, related_name='prayers', to=settings.AUTH_USER_MODEL)),
             ],
             options={
-                'index_together': {('recap_document', 'user'), ('recap_document', 'status'), ('date_created', 'user', 'status')},
+                'indexes': [
+                    models.Index(fields=['recap_document', 'user'], name='favorites_prayer_recap_document_user_idx'),
+                    models.Index(fields=['recap_document', 'status'], name='favorites_prayer_recap_document_status_idx'),
+                    models.Index(fields=['date_created', 'user', 'status'], name='favorites_prayer_date_created_user_status_idx'),
+                ],
             },
         ),
         migrations.CreateModel(
diff --git a/cl/lasc/migrations/0001_initial.py b/cl/lasc/migrations/0001_initial.py
index c974af21a0..9c19da4e3d 100644
--- a/cl/lasc/migrations/0001_initial.py
+++ b/cl/lasc/migrations/0001_initial.py
@@ -41,7 +41,9 @@ class Migration(migrations.Migration):
                 ('status_str', models.TextField(blank=True, help_text='The status of the case')),
             ],
             options={
-                'index_together': {('docket_number', 'district', 'division_code')},
+                'indexes': [
+                    models.Index(fields=['docket_number', 'district', 'division_code'], name='lasc_docket_docket_number_district_division_code_idx'),
+                ],
             },
         ),
         migrations.CreateModel(
diff --git a/cl/recap/factories.py b/cl/recap/factories.py
index 1b3562f8ba..718c665451 100644
--- a/cl/recap/factories.py
+++ b/cl/recap/factories.py
@@ -33,7 +33,7 @@ class Meta:
 
     pacer_case_id = Faker("pyint", min_value=100_000, max_value=400_000)
     upload_type = FuzzyChoice(UPLOAD_TYPE.NAMES, getter=lambda c: c[0])
-    filepath_local = FileField(filename=None)
+    filepath_local = FileField(filename="document.html")
 
 
 class PacerFetchQueueFactory(DjangoModelFactory):
diff --git a/cl/recap/migrations/0002_initial_part_two.py b/cl/recap/migrations/0002_initial_part_two.py
index f159a0cf5d..6a7d79d4c7 100644
--- a/cl/recap/migrations/0002_initial_part_two.py
+++ b/cl/recap/migrations/0002_initial_part_two.py
@@ -92,8 +92,8 @@ class Migration(migrations.Migration):
             name='uploader',
             field=models.ForeignKey(help_text='The user that sent in the email for processing.', on_delete=django.db.models.deletion.CASCADE, related_name='recap_email_processing_queue', to=settings.AUTH_USER_MODEL),
         ),
-        migrations.AlterIndexTogether(
-            name='fjcintegrateddatabase',
-            index_together={('district', 'docket_number')},
+        migrations.AddIndex(
+            model_name='fjcintegrateddatabase',
+            index=models.Index(fields=['district', 'docket_number'], name='recap_fjc_district_docket_number_idx'),
         ),
     ]
diff --git a/cl/search/factories.py b/cl/search/factories.py
index cdaaf711ad..c0fd5baea1 100644
--- a/cl/search/factories.py
+++ b/cl/search/factories.py
@@ -279,7 +279,7 @@ class Meta:
     pacer_case_id = Faker("pyint", min_value=100_000, max_value=400_000)
     docket_number = Faker("federal_district_docket_number")
     slug = Faker("slug")
-    filepath_local = FileField(filename=None)
+    filepath_local = FileField(filename="docket.xml")
     date_argued = Faker("date_object")
 
 
diff --git a/cl/search/migrations/0001_initial.py b/cl/search/migrations/0001_initial.py
index a40ffad040..cf773e885d 100644
--- a/cl/search/migrations/0001_initial.py
+++ b/cl/search/migrations/0001_initial.py
@@ -419,17 +419,17 @@ class Migration(migrations.Migration):
             name='recapdocument',
             unique_together={('docket_entry', 'document_number', 'attachment_number')},
         ),
-        migrations.AlterIndexTogether(
-            name='recapdocument',
-            index_together={('document_type', 'document_number', 'attachment_number')},
+        migrations.AddIndex(
+            model_name='recapdocument',
+            index=models.Index(fields=['document_type', 'document_number', 'attachment_number'], name='search_recapdocument_document_type_document_num_attachment_num_idx'),
         ),
         migrations.AlterUniqueTogether(
             name='opinionscited',
             unique_together={('citing_opinion', 'cited_opinion')},
         ),
-        migrations.AlterIndexTogether(
-            name='docketentry',
-            index_together={('recap_sequence_number', 'entry_number')},
+        migrations.AddIndex(
+            model_name='docketentry',
+            index=models.Index(fields=['recap_sequence_number', 'entry_number'], name='search_docketentry_recap_seq_num_entry_num_idx')
         ),
         migrations.AddIndex(
             model_name='docket',
@@ -439,16 +439,21 @@ class Migration(migrations.Migration):
             name='docket',
             unique_together={('docket_number', 'pacer_case_id', 'court')},
         ),
-        migrations.AlterIndexTogether(
-            name='docket',
-            index_together={('ia_upload_failure_count', 'ia_needs_upload', 'ia_date_first_change')},
+        migrations.AddIndex(
+            model_name='docket',
+            index=models.Index(fields=['ia_upload_failure_count', 'ia_needs_upload', 'ia_date_first_change'], name='search_docket_ia_upload_failure_count_ia_needs_upload_ia_date_first_change_idx'),
         ),
         migrations.AlterUniqueTogether(
             name='citation',
             unique_together={('cluster', 'volume', 'reporter', 'page')},
         ),
-        migrations.AlterIndexTogether(
-            name='citation',
-            index_together={('volume', 'reporter', 'page'), ('volume', 'reporter')},
+        migrations.AddIndex(
+            model_name='citation',
+            index=models.Index(fields=['volume', 'reporter', 'page'], name='search_citation_volume_reporter_page_idx')
         ),
+        migrations.AddIndex(
+            model_name='citation',
+            index=models.Index(fields=['volume', 'reporter'], name='search_citation_volume_reporter_idx')
+        ),
+
     ]
diff --git a/cl/search/migrations/0006_delete_unused_indexes.py b/cl/search/migrations/0006_delete_unused_indexes.py
index 0ee038ea56..c89db7765d 100644
--- a/cl/search/migrations/0006_delete_unused_indexes.py
+++ b/cl/search/migrations/0006_delete_unused_indexes.py
@@ -145,8 +145,10 @@ class Migration(migrations.Migration):
                 null=True,
             ),
         ),
-        migrations.AlterIndexTogether(
+        migrations.AlterModelOptions(
             name="docket",
-            index_together=set(),
+            options={
+                'indexes': [],
+            },
         ),
     ]

From 1bbab420fcf648d4ea0c8326bd02476822e5e82f Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Thu, 5 Sep 2024 16:59:09 -0600
Subject: [PATCH 321/372] Fix acms pacer_url

---
 cl/search/models.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index a4bb1786a1..2ba633a948 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -1488,11 +1488,11 @@ def pacer_url(self) -> str | None:
         court_id = map_cl_to_pacer_id(court.pk)
         if self.pacer_doc_id:
             if self.pacer_doc_id.count("-") > 1:
-                # It seems like loading the ACMS Download Page using links is not
-                # possible. we've implemented a modal window that explains this
-                # issue and guides users towards using the button to access the
-                # docket report.
-                return self.docket_entry.docket.pacer_docket_url
+                return (
+                    f"https://{court_id}-showdoc.azurewebsites.us/docs/"
+                    f"{self.docket_entry.docket.pacer_case_id}/"
+                    f"{self.pacer_doc_id}"
+                )
             elif court.jurisdiction == Court.FEDERAL_APPELLATE:
                 template = "https://ecf.%s.uscourts.gov/docs1/%s?caseId=%s"
             else:

From b2fe063f31ac67f81e64ff8f3f002ed23347e704 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 5 Sep 2024 18:58:57 -0500
Subject: [PATCH 322/372] fix(elasticsearch): Renamed initial complaint button
 to initial document

---
 cl/lib/elasticsearch_utils.py     | 53 ++++++++-----------------------
 cl/search/tests/tests_es_recap.py | 13 ++++----
 2 files changed, 20 insertions(+), 46 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 5e92724a85..6edab92b7e 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1811,12 +1811,7 @@ def merge_unavailable_fields_on_parent_document(
                     "pk", flat=True
                 )
             )
-            bankruptcy_ids = (
-                Court.federal_courts.bankruptcy_pacer_courts().values_list(
-                    "pk", flat=True
-                )
-            )
-            initial_complaints = (
+            initial_documents = (
                 RECAPDocument.objects.filter(
                     QObject(
                         QObject(
@@ -1851,52 +1846,30 @@ def merge_unavailable_fields_on_parent_document(
                     "docket_entry__docket__court__jurisdiction",
                     "docket_entry__docket__court_id",
                 )
-                .annotate(
-                    court_type=Case(
-                        When(
-                            docket_entry__docket__court_id__in=appellate_court_ids,
-                            then=Value("appellate"),
-                        ),
-                        When(
-                            docket_entry__docket__court_id__in=bankruptcy_ids,
-                            then=Value("bankruptcy"),
-                        ),
-                        default=Value("district"),
-                        output_field=CharField(),
-                    )
-                )
             )
 
-            initial_complaints_in_page = {}
-            for initial_complaint in initial_complaints:
-                if initial_complaint.has_valid_pdf:
+            initial_documents_in_page = {}
+            for initial_document in initial_documents:
+                if initial_document.has_valid_pdf:
                     # Initial complaint/petition/appeal available
-                    text_button = {
-                        "appellate": "Notice of Appeal",
-                        "bankruptcy": "Initial Petition",
-                    }.get(initial_complaint.court_type, "Initial Complaint")
-                    initial_complaints_in_page[
-                        initial_complaint.docket_entry.docket_id
+                    text_button = "Initial Document"
+                    initial_documents_in_page[
+                        initial_document.docket_entry.docket_id
                     ] = (
-                        initial_complaint.get_absolute_url(),
+                        initial_document.get_absolute_url(),
                         None,
                         text_button,
                     )
                 else:
                     # Initial complaint/petition/appeal not available. Buy button.
-                    buy_text_button = {
-                        "appellate": "Buy Notice of Appeal",
-                        "bankruptcy": "Buy Initial Petition",
-                    }.get(
-                        initial_complaint.court_type, "Buy Initial Complaint"
-                    )
-                    initial_complaints_in_page[
-                        initial_complaint.docket_entry.docket_id
-                    ] = (None, initial_complaint.pacer_url, buy_text_button)
+                    buy_text_button = "Buy Initial Document"
+                    initial_documents_in_page[
+                        initial_document.docket_entry.docket_id
+                    ] = (None, initial_document.pacer_url, buy_text_button)
 
             for result in results:
                 complaint_url, buy_complaint_url, text_button = (
-                    initial_complaints_in_page.get(
+                    initial_documents_in_page.get(
                         result.docket_id, (None, None, "")
                     )
                 )
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index ec9cdde15d..19eb055cdc 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -2473,7 +2473,7 @@ def test_initial_complaint_button(self) -> None:
             cd, 1, "Complaint available"
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
-        self.assertEqual("Initial Complaint", button_text)
+        self.assertEqual("Initial Document", button_text)
         self.assertEqual(initial_complaint_1.get_absolute_url(), button_url)
 
         # District document initial complaint not available. Show Buy button.
@@ -2485,7 +2485,7 @@ def test_initial_complaint_button(self) -> None:
             cd, 1, "Complaint Not available"
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
-        self.assertEqual("Buy Initial Complaint", button_text)
+        self.assertEqual("Buy Initial Document", button_text)
         self.assertEqual(initial_complaint_2.pacer_url, button_url)
 
         # Appellate notice of appeal available
@@ -2497,7 +2497,7 @@ def test_initial_complaint_button(self) -> None:
             cd, 1, "Complaint Appellate available"
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
-        self.assertEqual("Notice of Appeal", button_text)
+        self.assertEqual("Initial Document", button_text)
         self.assertEqual(initial_complaint_4.get_absolute_url(), button_url)
 
         # No docket entry is available for the initial complaint. No button is shown.
@@ -2534,7 +2534,7 @@ def test_initial_complaint_button(self) -> None:
             cd, 1, "Complaint Appellate available"
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
-        self.assertEqual("Buy Notice of Appeal", button_text)
+        self.assertEqual("Buy Initial Document", button_text)
         self.assertEqual(initial_complaint_5.pacer_url, button_url)
 
         "Lorem Bankruptcy vs Petition Available"
@@ -2548,7 +2548,7 @@ def test_initial_complaint_button(self) -> None:
             cd, 1, "Complaint available"
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
-        self.assertEqual("Initial Petition", button_text)
+        self.assertEqual("Initial Document", button_text)
         self.assertEqual(initial_complaint_6.get_absolute_url(), button_url)
 
         # Bankruptcy document initial petition not available. Show Buy button.
@@ -2561,7 +2561,8 @@ def test_initial_complaint_button(self) -> None:
         )
         button_url, button_text = self._parse_initial_complaint_button(r)
         self.assertEqual(
-            "Buy Initial Petition", button_text, msg="Failed here..."
+            "Buy Initial Document",
+            button_text,
         )
         self.assertEqual(initial_complaint_7.pacer_url, button_url)
 

From 8cdff8d9650e0697126dabf39ab35dbe1793e7e7 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Thu, 5 Sep 2024 19:03:03 -0500
Subject: [PATCH 323/372] tests(scrapers): refactor tests for
 DupChecker.press_on

Related to preventing further duplicates as seen on #4376, due to changes introduced in #4303

- Refactor tests for DupChecker.press_on method: replaces fixtures,  loops and if clauses by explicit test objects and explicit press_on calls for each scenario
---
 cl/scrapers/tests.py | 137 ++++++++++++++++++++++++-------------------
 1 file changed, 78 insertions(+), 59 deletions(-)

diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 5f3136701a..ae5e33217d 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -470,75 +470,94 @@ def test_press_on_with_an_empty_database(self) -> None:
                 self.fail(failure)
 
 
-class DupcheckerWithFixturesTest(TestCase):
-    fixtures = [
-        "test_court.json",
-        "judge_judy.json",
-        "test_objects_search.json",
-    ]
-
+class DupcheckerPressOnTest(TestCase):
     def setUp(self) -> None:
         super().setUp()
         self.court = Court.objects.get(pk="test")
 
-        # Set the dup_threshold to zero for these tests
-        self.dup_checkers = [
-            DupChecker(self.court, full_crawl=True, dup_threshold=0),
-            DupChecker(self.court, full_crawl=False, dup_threshold=0),
-        ]
+        self.dc_full_crawl = DupChecker(self.court, True, 2)
+        self.dc_not_full_crawl = DupChecker(self.court, False, 2)
+        self.dup_checkers = [self.dc_full_crawl, self.dc_not_full_crawl]
+        self.dup_hash = "1" * 40
+        self.press_on_args = [Opinion, now(), now(), self.dup_hash]
 
-        # Set up the hash value using one in the fixture.
-        self.content_hash = "asdfasdfasdfasdfasdfasddf"
+        docket = DocketFactory()
+        cluster = OpinionClusterFactory(docket=docket)
+        opinion = OpinionFactory(sha1=self.dup_hash, cluster=cluster)
+
+    def test_press_on_no_dup(self) -> None:
+        """Does the DupChecker raises no error when seeing a new hash?"""
+        self.dc_full_crawl.press_on(*self.press_on_args[:-1], "not a dup")
+        self.dc_not_full_crawl.press_on(*self.press_on_args[:-1], "not a dup")
 
     def test_press_on_with_a_dup_found(self) -> None:
-        for dup_checker in self.dup_checkers:
-            try:
-                dup_checker.press_on(
-                    Opinion,
-                    now(),
-                    now(),
-                    lookup_value=self.content_hash,
-                    lookup_by="sha1",
-                )
-                if not dup_checker.full_crawl:
-                    self.fail("Did not raise ConsecutiveDuplicatesError.")
-            except ConsecutiveDuplicatesError:
-                if dup_checker.full_crawl:
-                    self.fail(
-                        "DupChecker raised ConsecutiveDuplicatesError breaking"
-                        " the outer loop. Nothing should stop a full crawl!"
-                    )
-            except SingleDuplicateError:
-                # Full crawl or not, a SingleDuplicateError is
-                # expected when a duplicate is found
-                pass
+        """Do we raise the appropiate exceptions when a dup is found?"""
+        # First duplicate
+        try:
+            self.dc_full_crawl.press_on(*self.press_on_args)
+            self.fail("Should raise SingleDuplicateError")
+        except ConsecutiveDuplicatesError:
+            self.fail("Full crawl raised a loop breaking exception")
+        except SingleDuplicateError:
+            pass  # we expect this to happen
+
+        # Second duplicate, dup threshold = 2
+        try:
+            self.dc_full_crawl.press_on(*self.press_on_args)
+            self.fail("Should raise SingleDuplicateError")
+        except ConsecutiveDuplicatesError:
+            self.fail("Full crawl raised a loop breaking exception")
+        except SingleDuplicateError:
+            pass
+
+        # First duplicate
+        try:
+            self.dc_not_full_crawl.press_on(*self.press_on_args)
+            self.fail("Should raise SingleDuplicateError")
+        except SingleDuplicateError:
+            pass
+        except ConsecutiveDuplicatesError:
+            self.fail(
+                "Dup threshold is 1, should not raise ConsecutiveDuplicatesError"
+            )
+
+        # Second duplicate, dup threshold = 2
+        try:
+            self.dc_not_full_crawl.press_on(*self.press_on_args)
+            self.fail("Should raise ConsecutiveDuplicatesError")
+        except SingleDuplicateError:
+            self.fail("Should raise ConsecutiveDuplicatesError")
+        except ConsecutiveDuplicatesError:
+            pass  # expected behavior
 
     def test_press_on_with_dup_found_and_older_date(self) -> None:
-        for dup_checker in self.dup_checkers:
-            # Note that the next case occurs prior to the current one
-            try:
-                dup_checker.press_on(
-                    Opinion,
-                    now(),
-                    now() - timedelta(days=1),
-                    lookup_value=self.content_hash,
-                    lookup_by="sha1",
-                )
-                self.fail(
-                    "Expected raising SingleDuplicateError, there was a duplicate in the DB"
-                )
+        """Do we raise the appropiate exception when a duplicate is found
+        and we account for case dates?
+        """
+        self.dc_not_full_crawl.reset()
+        self.dc_full_crawl.reset()
+
+        # duplicated case occurs prior to the current one
+        args = [*self.press_on_args]
+        args[2] = now() - timedelta(days=1)
+
+        try:
+            self.dc_full_crawl.press_on(*args)
+            self.fail("Expected SingleDuplicateError")
+        except SingleDuplicateError:
+            pass
+        except ConsecutiveDuplicatesError:
+            self.fail(
+                "This a full crawl, ConsecutiveDuplicatesError should not be raised"
+            )
 
-            except SingleDuplicateError:
-                # Full crawl or not, a SingleDuplicateError is
-                # expected when a duplicate is found
-                pass
-            except ConsecutiveDuplicatesError:
-                if dup_checker.full_crawl:
-                    self.fail(
-                        "DupChecker raised ConsecutiveDuplicatesError during a "
-                        "full crawl, breaking the outer loop. Nothing should "
-                        "stop a full crawl!"
-                    )
+        try:
+            self.dc_not_full_crawl.press_on(*args)
+            self.fail("Expected loop breaking ConsecutiveDuplicatesError")
+        except SingleDuplicateError:
+            self.fail("Expected loop breaking ConsecutiveDuplicatesError")
+        except ConsecutiveDuplicatesError:
+            pass
 
 
 class AudioFileTaskTest(TestCase):

From b89af29d40322318c08de09de3b3aa1b18440941 Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Fri, 6 Sep 2024 01:47:51 +0000
Subject: [PATCH 324/372] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 69d7b16f60..57dc6db9f0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2238,13 +2238,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.23"
+version = "2.6.24"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.23-py27-none-any.whl", hash = "sha256:15df44d1a037bb5a0f47ecfcd1f1c2163addd42fa893929043f1d717ded5f1f8"},
-    {file = "juriscraper-2.6.23.tar.gz", hash = "sha256:96d5e00a87c456e01e9d8953b545555857539aea4992c355acdad3f854251945"},
+    {file = "juriscraper-2.6.24-py27-none-any.whl", hash = "sha256:0162fd8fceb4bd1de3c8715e8e801b5031900e5d2a77bf2239604fd1e7c9e9f7"},
+    {file = "juriscraper-2.6.24.tar.gz", hash = "sha256:d3d964ae3e7f53541c2a6909cfebb83fbb91a061774809a160caa2851f2a333c"},
 ]
 
 [package.dependencies]

From 88eecb7a20eefd94a09dc7d9375af08e23fcb3df Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 6 Sep 2024 00:31:45 -0400
Subject: [PATCH 325/372] refactor(recap): Remove modal template for ACMS docs

---
 cl/favorites/templates/tag.html               |  1 -
 cl/opinion_page/static/js/buy_pacer_modal.js  | 19 --------------
 cl/opinion_page/templates/docket_tabs.html    |  1 -
 .../templates/includes/buy_acms_modal.html    | 26 -------------------
 cl/opinion_page/templates/recap_document.html |  1 -
 5 files changed, 48 deletions(-)
 delete mode 100644 cl/opinion_page/templates/includes/buy_acms_modal.html

diff --git a/cl/favorites/templates/tag.html b/cl/favorites/templates/tag.html
index 10c901de46..45226f9cd7 100644
--- a/cl/favorites/templates/tag.html
+++ b/cl/favorites/templates/tag.html
@@ -51,7 +51,6 @@ <h3 class="v-offset-above-3" id="tag-list-title"><i class="fa-list fa grey" titl
           <ul>
       {% endif %}
             {% include "includes/buy_pacer_modal.html" %}
-            {% include "includes/buy_acms_modal.html" %}
             {% include "includes/docket_li.html" %}
       {% if forloop.last %}
           </ul>
diff --git a/cl/opinion_page/static/js/buy_pacer_modal.js b/cl/opinion_page/static/js/buy_pacer_modal.js
index 0bf8ecda1e..982d76dbea 100644
--- a/cl/opinion_page/static/js/buy_pacer_modal.js
+++ b/cl/opinion_page/static/js/buy_pacer_modal.js
@@ -12,20 +12,6 @@ $(document).ready(function () {
     }
   });
 
-
-  $('.open_buy_acms_modal').on('click', function (e) {
-    //Modal clicked
-    //check if ctrl or shift key pressed
-    if (e.metaKey || e.shiftKey) {
-      //prevent modal from opening, go directly to href link
-      e.stopPropagation();
-    }else {
-      //otherwise open modal and concatenate pacer URL to button
-      let pacer_url = $(this).attr('href');
-      $('#acms_url').attr('href', pacer_url);
-    }
-  });
-
   //////////////////////////
   //  Modal Cookie Handling//
   //////////////////////////
@@ -39,9 +25,4 @@ $(document).ready(function () {
     ///Close Modal
     $('#modal-buy-pacer').modal('toggle');
   });
-
-  $('#acms_url').on('click', function (e) {
-    ///Close Modal
-    $('#modal-buy-acms').modal('toggle');
-  });
 });
diff --git a/cl/opinion_page/templates/docket_tabs.html b/cl/opinion_page/templates/docket_tabs.html
index 48aa3f6318..9357b0bc1a 100644
--- a/cl/opinion_page/templates/docket_tabs.html
+++ b/cl/opinion_page/templates/docket_tabs.html
@@ -46,7 +46,6 @@
             src="{% static "js/buy_pacer_modal.js" %}"></script>
 
     {% include "includes/buy_pacer_modal.html" %}
-    {% include "includes/buy_acms_modal.html" %}
     {% include "includes/date_picker.html" %}
 
     <script type="text/javascript"
diff --git a/cl/opinion_page/templates/includes/buy_acms_modal.html b/cl/opinion_page/templates/includes/buy_acms_modal.html
deleted file mode 100644
index 237623db62..0000000000
--- a/cl/opinion_page/templates/includes/buy_acms_modal.html
+++ /dev/null
@@ -1,26 +0,0 @@
-{% load humanize %}
-<div id="modal-buy-acms"
-     class="modal hidden-print text-left"
-     data-remote="{# prevent bs.model from loading PACER content #}"
-     role="dialog"
-     aria-hidden="true"
-     tabindex="-1">
-  <div class="modal-dialog" role="document">
-    <div class="modal-content">
-      <div class="modal-header">
-        <button type="button" class="close" data-dismiss="modal"
-                aria-label="Close"><span aria-hidden="true">&times;</span>
-        </button>
-        <h2 class="modal-title">ACMS Document URL Unavailable</h2>
-      </div>
-      <div class="modal-body">
-        <p>This document is filed in the new <strong>Appellate Case Management System (ACMS)</strong>, which does not have direct document access. You must buy the docket sheet from the system and then buy the documents from there.</p>
-      </div>
-      <div class="modal-footer">
-        <p>
-          <a href="#" id="acms_url" target="_blank" class="btn btn-primary">Buy Docket in ACMS</a>
-        </p>
-      </div>
-    </div>
-  </div>
-</div>
diff --git a/cl/opinion_page/templates/recap_document.html b/cl/opinion_page/templates/recap_document.html
index 32600cbb41..8c1cb5fbae 100644
--- a/cl/opinion_page/templates/recap_document.html
+++ b/cl/opinion_page/templates/recap_document.html
@@ -73,7 +73,6 @@ <h2 class="inline"><a
               class="no-underline black-link">{{ rd.docket_entry.docket|best_case_name|safe|v_wrapper }}</a></h2>
       {% include "includes/notes_modal.html" %}
       {% include "includes/buy_pacer_modal.html" %}
-      {% include "includes/buy_acms_modal.html" %}
       {% if redirect_to_pacer_modal %}
         {% include "includes/redirect_to_pacer_modal.html" %}
       {% endif %}

From a6f1ae9b5363fa9ae0b0ba3096de8d627a81dda1 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 6 Sep 2024 00:36:20 -0400
Subject: [PATCH 326/372] refactor(recap): Removes logic to display ACMS modal

---
 .../templates/includes/de_list.html           | 35 ++++---------------
 .../includes/rd_download_button.html          | 23 ++++--------
 2 files changed, 12 insertions(+), 46 deletions(-)

diff --git a/cl/opinion_page/templates/includes/de_list.html b/cl/opinion_page/templates/includes/de_list.html
index 508fc64fdf..0c0776ef9f 100644
--- a/cl/opinion_page/templates/includes/de_list.html
+++ b/cl/opinion_page/templates/includes/de_list.html
@@ -143,25 +143,16 @@
                           <li role="separator" class="divider"></li>
                           <li>
                               <a href="{{ rd.pacer_url }}"
-                                {% if '-' in rd.pacer_doc_id %}
-                                  class="open_buy_acms_modal"
-                                  data-toggle="modal"
-                                  data-target="#modal-buy-acms"
-                                {% elif not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
+                                {% if not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
                                   class="open_buy_pacer_modal"
                                   data-toggle="modal"
                                   data-target="#modal-buy-pacer"
                                 {% endif %}
                                 target="_blank"
                                 rel="nofollow">
-                                  {% if '-' in rd.pacer_doc_id %}
-                                    Direct Link Unavailable
-                                  {% else %}
-                                    {% if rd.is_free_on_pacer %}From PACER{% else %}Buy on PACER{% endif %} {% if rd.page_count %}(${{ rd|price }}){% endif %}
-                                  {% endif %}
+                                  {% if rd.is_free_on_pacer %}From PACER{% else %}Buy on PACER{% endif %} {% if rd.page_count %}(${{ rd|price }}){% endif %}
                               </a>
                           </li>
-                        </li>
                         {% endif %}
                     </ul>
                   {% else %}
@@ -171,23 +162,14 @@
                     {% else %}
                       {% if rd.pacer_url %}
                         <a href="{{ rd.pacer_url }}"
-                          {% if '-' in rd.pacer_doc_id %}
-                            class="disabled open_buy_acms_modal cursor btn btn-default btn-xs"
-                            data-toggle="modal" data-target="#modal-buy-acms"
-                            style="pointer-events: all;"
-                          {% elif not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
+                          {% if not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
                             class="open_buy_pacer_modal btn btn-default btn-xs"
                             data-toggle="modal" data-target="#modal-buy-pacer"
                           {% else%}
                             class="btn btn-default btn-xs"
                           {% endif %}
                             target="_blank"
-                            rel="nofollow">
-                            {% if '-' in rd.pacer_doc_id %}
-                              Direct Link Unavailable
-                            {% else %}
-                              Buy on PACER {% if rd.page_count %}(${{ rd|price }}){% endif %}
-                            {% endif %}
+                            rel="nofollow">Buy on PACER {% if rd.page_count %}(${{ rd|price }}){% endif %}
                         </a>
                       {% endif %}
                     {% endif %}
@@ -211,12 +193,7 @@
                     {% else %}
                       {% if rd.pacer_url %}
                         <a href="{{ rd.pacer_url }}"
-                          {% if '-' in rd.pacer_doc_id %}
-                            data-toggle="modal"
-                            data-target="#modal-buy-acms"
-                            class="disabled open_buy_acms_modal cursor btn btn-default btn-xs"
-                            style="pointer-events: all;"
-                          {% elif not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
+                          {% if not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
                             data-toggle="modal"
                             data-target="#modal-buy-pacer"
                             class="open_buy_pacer_modal btn btn-default btn-xs"
@@ -225,7 +202,7 @@
                           {% endif %}
                           target="_blank"
                           rel="nofollow"
-                          title="{% if '-' in rd.pacer_doc_id %}Direct Link Unavailable{% else%}Buy on PACER {% if rd.page_count %}(${{ rd|price }}){% endif %}{% endif %}"><i class="fa fa-download"></i>
+                          title="Buy on PACER {% if rd.page_count %}(${{ rd|price }}){% endif %}"><i class="fa fa-download"></i>
                         </a>
                       {% endif %}
                     {% endif %}
diff --git a/cl/opinion_page/templates/includes/rd_download_button.html b/cl/opinion_page/templates/includes/rd_download_button.html
index 0157f53fec..73f2ba88a4 100644
--- a/cl/opinion_page/templates/includes/rd_download_button.html
+++ b/cl/opinion_page/templates/includes/rd_download_button.html
@@ -25,20 +25,15 @@
         </li>
         <li role="separator" class="divider"></li>
       {% endif %}
-      {% if rd.pacer_url or '-' in rd.pacer_doc_id%}
+      {% if rd.pacer_url %}
         <li>
             <a href="{{ rd.pacer_url }}"
-              {% if '-' in rd.pacer_doc_id %}
-                data-toggle="modal" data-target="#modal-buy-acms"
-                class="open_buy_acms_modal"
-              {% elif not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
+              {% if not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
                 data-toggle="modal" data-target="#modal-buy-pacer"
                 class="open_buy_pacer_modal"
               {% endif %}
               target="_blank"
-              rel="nofollow">
-              {% if '-' in rd.pacer_doc_id %}Direct Link Unavailable{% else %}Buy on PACER{% endif %}
-            </a>
+              rel="nofollow">Buy on PACER</a>
         </li>
       {% endif %}
     </ul>
@@ -47,13 +42,9 @@
     {% if rd.is_sealed %}
       <span class="btn btn-primary disabled">This Item is Sealed</span>
     {% else %}
-      {% if rd.pacer_url or '-' in rd.pacer_doc_id%}
+      {% if rd.pacer_url %}
         <a href="{{ rd.pacer_url }}"
-          {% if '-' in rd.pacer_doc_id %}
-            data-toggle="modal" data-target="#modal-buy-acms"
-            class="disabled open_buy_acms_modal cursor btn btn-primary"
-            style="pointer-events: all;"
-          {% elif not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
+          {% if not request.COOKIES.buy_on_pacer_modal and not request.COOKIES.recap_install_plea %}
             data-toggle="modal" data-target="#modal-buy-pacer"
             class="open_buy_pacer_modal btn btn-primary"
           {% else %}
@@ -61,9 +52,7 @@
           {% endif %}
           target="_blank"
           rel="nofollow">
-          <i class="fa fa-external-link"></i>
-          {% if '-' in rd.pacer_doc_id %}Direct Link Unavailable{% else %}Buy on PACER{% endif %}
-        </a>
+          <i class="fa fa-external-link"></i>Buy on PACER</a>
       {% endif %}
     {% endif %}
   {% endif %}

From fe28612431ca21d39931b20e8e8b138baaf75c59 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 6 Sep 2024 09:31:10 -0500
Subject: [PATCH 327/372] fix(elasticsearch): Set case_name_full in
 OpinionClusterFactory to prevent random failures in case_name exact matching

---
 cl/search/tests/tests_es_opinion.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index d84835e259..6c29e60c3a 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -2189,12 +2189,14 @@ def test_uses_exact_version_for_case_name_field(self) -> None:
         with self.captureOnCommitCallbacks(execute=True):
             cluster_1 = OpinionClusterFactory.create(
                 case_name="Maecenas Howell",
+                case_name_full="Ipsum Dolor",
                 precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
                 docket=self.docket_1,
             )
             OpinionFactory.create(cluster=cluster_1, plain_text="")
             cluster_2 = OpinionClusterFactory.create(
                 case_name="Maecenas Howells",
+                case_name_full="Ipsum Dolor",
                 precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
                 docket=self.docket_1,
             )

From 72ce383e788a8283f87e063df4198726a6d65b21 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 6 Sep 2024 10:42:46 -0500
Subject: [PATCH 328/372] fix(elasticsearch): Refactored code to use the new
 button text for Initial Document

---
 cl/lib/elasticsearch_utils.py                 | 26 ++++---
 .../templates/includes/search_result.html     | 12 +--
 cl/search/tests/tests_es_recap.py             | 78 +++++++++----------
 3 files changed, 59 insertions(+), 57 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 6edab92b7e..1b9ca92683 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1793,9 +1793,9 @@ def merge_unavailable_fields_on_parent_document(
         case (
             SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS
         ) if request_type == "frontend":
-            # Merge initial complaint button to the frontend search results.
+            # Merge initial document button to the frontend search results.
             docket_ids = {doc["docket_id"] for doc in results}
-            # This query retrieves initial complaint documents considering two
+            # This query retrieves initial documents considering two
             # possibilities:
             # 1. For district, bankruptcy, and appellate entries where we don't know
             #    if the entry contains attachments, it considers:
@@ -1851,31 +1851,33 @@ def merge_unavailable_fields_on_parent_document(
             initial_documents_in_page = {}
             for initial_document in initial_documents:
                 if initial_document.has_valid_pdf:
-                    # Initial complaint/petition/appeal available
-                    text_button = "Initial Document"
+                    # Initial Document available
                     initial_documents_in_page[
                         initial_document.docket_entry.docket_id
                     ] = (
                         initial_document.get_absolute_url(),
                         None,
-                        text_button,
+                        "Initial Document",
                     )
                 else:
-                    # Initial complaint/petition/appeal not available. Buy button.
-                    buy_text_button = "Buy Initial Document"
+                    # Initial Document not available. Buy button.
                     initial_documents_in_page[
                         initial_document.docket_entry.docket_id
-                    ] = (None, initial_document.pacer_url, buy_text_button)
+                    ] = (
+                        None,
+                        initial_document.pacer_url,
+                        "Buy Initial Document",
+                    )
 
             for result in results:
-                complaint_url, buy_complaint_url, text_button = (
+                document_url, buy_document_url, text_button = (
                     initial_documents_in_page.get(
                         result.docket_id, (None, None, "")
                     )
                 )
-                result["initial_complaint_url"] = complaint_url
-                result["buy_initial_complaint_url"] = buy_complaint_url
-                result["initial_complaint_text"] = text_button
+                result["initial_document_url"] = document_url
+                result["buy_initial_document_url"] = buy_document_url
+                result["initial_document_text"] = text_button
 
         case SEARCH_TYPES.OPINION if request_type == "v4" and not highlight:
             # Retrieves the Opinion plain_text from the DB to fill the snippet
diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index ca528c8c7d..bba82f0930 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -207,13 +207,13 @@ <h4>
       {% endwith %}
     {% endfor %}
     <div class="col-md-offset-half">
-      {% if result.initial_complaint_url %}
-        <a href="{{ result.initial_complaint_url }}" class="initial-complaint btn-primary btn">
-          {{ result.initial_complaint_text }}
+      {% if result.initial_document_url %}
+        <a href="{{ result.initial_document_url }}" class="initial-document btn-primary btn">
+          {{ result.initial_document_text }}
         </a>
-      {% elif result.buy_initial_complaint_url %}
-        <a href="{{ result.buy_initial_complaint_url }}" rel="nofollow" target="_blank"  class="initial-complaint btn-primary btn">
-          {{ result.initial_complaint_text }}
+      {% elif result.buy_initial_document_url %}
+        <a href="{{ result.buy_initial_document_url }}" rel="nofollow" target="_blank"  class="initial-document btn-primary btn">
+          {{ result.initial_document_text }}
         </a>
       {% endif %}
       {% if result.child_remaining %}
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 19eb055cdc..502a1986f3 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -224,18 +224,18 @@ def _count_child_documents_dict(
         )
 
     @staticmethod
-    def _parse_initial_complaint_button(response):
-        """Parse the initial complaint button within the HTML response."""
+    def _parse_initial_document_button(response):
+        """Parse the initial document button within the HTML response."""
         tree = html.fromstring(response.content.decode())
         try:
-            initial_complaint = tree.xpath(
-                "//a[contains(@class, 'initial-complaint')]"
+            initial_document = tree.xpath(
+                "//a[contains(@class, 'initial-document')]"
             )[0]
         except IndexError:
             return None, None
         return (
-            initial_complaint.get("href"),
-            initial_complaint.text_content().strip(),
+            initial_document.get("href"),
+            initial_document.text_content().strip(),
         )
 
     def test_has_child_text_queries(self) -> None:
@@ -2273,8 +2273,8 @@ async def test_fail_rd_type_gracefully_frontend(self) -> None:
         self.assertEqual(r.status_code, 200)
         self.assertIn("encountered an error", r.content.decode())
 
-    def test_initial_complaint_button(self) -> None:
-        """Confirm the initial complaint button is properly shown on different
+    def test_initial_document_button(self) -> None:
+        """Confirm the initial document button is properly shown on different
         scenarios"""
 
         district_court = CourtFactory(id="cand", jurisdiction="FD")
@@ -2283,7 +2283,7 @@ def test_initial_complaint_button(self) -> None:
         # Add dockets with no documents
         with self.captureOnCommitCallbacks(execute=True):
 
-            # District document initial complaint available
+            # District document initial document available
             de_1 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
                     court=district_court,
@@ -2297,7 +2297,7 @@ def test_initial_complaint_button(self) -> None:
             )
             dockets_to_remove.append(de_1.docket)
             sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
-            initial_complaint_1 = RECAPDocumentFactory(
+            initial_document_1 = RECAPDocumentFactory(
                 docket_entry=de_1,
                 document_number="1",
                 document_type=RECAPDocument.PACER_DOCUMENT,
@@ -2316,7 +2316,7 @@ def test_initial_complaint_button(self) -> None:
                 pacer_doc_id="1234568",
             )
 
-            # District document initial complaint not available
+            # District document initial document not available
             de_2 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
                     court=district_court,
@@ -2329,7 +2329,7 @@ def test_initial_complaint_button(self) -> None:
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
             dockets_to_remove.append(de_2.docket)
-            initial_complaint_2 = RECAPDocumentFactory(
+            initial_document_2 = RECAPDocumentFactory(
                 docket_entry=de_2,
                 document_number="1",
                 document_type=RECAPDocument.PACER_DOCUMENT,
@@ -2359,14 +2359,14 @@ def test_initial_complaint_button(self) -> None:
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
             dockets_to_remove.append(de_3.docket)
-            initial_complaint_3 = RECAPDocumentFactory(
+            initial_document_3 = RECAPDocumentFactory(
                 docket_entry=de_3,
                 document_number="1",
                 is_available=False,
                 pacer_doc_id=None,
             )
 
-            # Appellate document initial complaint available
+            # Appellate document initial document available
             de_4 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
                     court=self.court_2,
@@ -2380,7 +2380,7 @@ def test_initial_complaint_button(self) -> None:
             )
             dockets_to_remove.append(de_4.docket)
             sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
-            initial_complaint_4 = RECAPDocumentFactory(
+            initial_document_4 = RECAPDocumentFactory(
                 docket_entry=de_4,
                 document_number="1",
                 attachment_number=1,
@@ -2403,7 +2403,7 @@ def test_initial_complaint_button(self) -> None:
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
             dockets_to_remove.append(de_5.docket)
-            initial_complaint_5 = RECAPDocumentFactory(
+            initial_document_5 = RECAPDocumentFactory(
                 docket_entry=de_5,
                 document_number="1",
                 attachment_number=1,
@@ -2412,7 +2412,7 @@ def test_initial_complaint_button(self) -> None:
                 pacer_doc_id="765425",
             )
 
-            # No DocketEntry for the initial complaint available
+            # No DocketEntry for the initial document available
             empty_docket = DocketFactory(
                 court=district_court,
                 case_name="Lorem No Initial Complaint Entry",
@@ -2420,7 +2420,7 @@ def test_initial_complaint_button(self) -> None:
                 source=Docket.RECAP,
             )
             dockets_to_remove.append(empty_docket)
-            # Bankruptcy document initial petition available
+            # Bankruptcy document initial document available
             de_6 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
                     court=self.court,
@@ -2434,7 +2434,7 @@ def test_initial_complaint_button(self) -> None:
             )
             dockets_to_remove.append(de_6.docket)
             sample_file = SimpleUploadedFile("recap_filename.pdf", b"file")
-            initial_complaint_6 = RECAPDocumentFactory(
+            initial_document_6 = RECAPDocumentFactory(
                 docket_entry=de_6,
                 document_number="1",
                 document_type=RECAPDocument.PACER_DOCUMENT,
@@ -2443,7 +2443,7 @@ def test_initial_complaint_button(self) -> None:
                 pacer_doc_id="12345875",
             )
 
-            # Bankruptcy document initial petition not available
+            # Bankruptcy document initial document not available
             de_7 = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
                     court=self.court,
@@ -2456,7 +2456,7 @@ def test_initial_complaint_button(self) -> None:
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
             dockets_to_remove.append(de_7.docket)
-            initial_complaint_7 = RECAPDocumentFactory(
+            initial_document_7 = RECAPDocumentFactory(
                 docket_entry=de_7,
                 document_number="1",
                 document_type=RECAPDocument.PACER_DOCUMENT,
@@ -2464,19 +2464,19 @@ def test_initial_complaint_button(self) -> None:
                 pacer_doc_id="35345875",
             )
 
-        # District document initial complaint available
+        # District document initial document available
         cd = {
             "type": SEARCH_TYPES.RECAP,
             "q": '"Lorem District vs Complaint Available"',
         }
         r = async_to_sync(self._test_article_count)(
-            cd, 1, "Complaint available"
+            cd, 1, "Document available"
         )
-        button_url, button_text = self._parse_initial_complaint_button(r)
+        button_url, button_text = self._parse_initial_document_button(r)
         self.assertEqual("Initial Document", button_text)
-        self.assertEqual(initial_complaint_1.get_absolute_url(), button_url)
+        self.assertEqual(initial_document_1.get_absolute_url(), button_url)
 
-        # District document initial complaint not available. Show Buy button.
+        # District document initial document not available. Show Buy button.
         cd = {
             "type": SEARCH_TYPES.RECAP,
             "q": '"Lorem District vs Complaint Not Available"',
@@ -2484,9 +2484,9 @@ def test_initial_complaint_button(self) -> None:
         r = async_to_sync(self._test_article_count)(
             cd, 1, "Complaint Not available"
         )
-        button_url, button_text = self._parse_initial_complaint_button(r)
+        button_url, button_text = self._parse_initial_document_button(r)
         self.assertEqual("Buy Initial Document", button_text)
-        self.assertEqual(initial_complaint_2.pacer_url, button_url)
+        self.assertEqual(initial_document_2.pacer_url, button_url)
 
         # Appellate notice of appeal available
         cd = {
@@ -2496,11 +2496,11 @@ def test_initial_complaint_button(self) -> None:
         r = async_to_sync(self._test_article_count)(
             cd, 1, "Complaint Appellate available"
         )
-        button_url, button_text = self._parse_initial_complaint_button(r)
+        button_url, button_text = self._parse_initial_document_button(r)
         self.assertEqual("Initial Document", button_text)
-        self.assertEqual(initial_complaint_4.get_absolute_url(), button_url)
+        self.assertEqual(initial_document_4.get_absolute_url(), button_url)
 
-        # No docket entry is available for the initial complaint. No button is shown.
+        # No docket entry is available for the initial document. No button is shown.
         cd = {
             "type": SEARCH_TYPES.RECAP,
             "q": '"Lorem No Initial Complaint Entry"',
@@ -2508,7 +2508,7 @@ def test_initial_complaint_button(self) -> None:
         r = async_to_sync(self._test_article_count)(
             cd, 1, "Complaint Entry no available"
         )
-        button_url, button_text = self._parse_initial_complaint_button(r)
+        button_url, button_text = self._parse_initial_document_button(r)
         self.assertIsNone(button_text)
         self.assertIsNone(button_url)
 
@@ -2521,7 +2521,7 @@ def test_initial_complaint_button(self) -> None:
         r = async_to_sync(self._test_article_count)(
             cd, 1, "Appellate Complaint button no available"
         )
-        button_url, button_text = self._parse_initial_complaint_button(r)
+        button_url, button_text = self._parse_initial_document_button(r)
         self.assertIsNone(button_text)
         self.assertIsNone(button_url)
 
@@ -2533,9 +2533,9 @@ def test_initial_complaint_button(self) -> None:
         r = async_to_sync(self._test_article_count)(
             cd, 1, "Complaint Appellate available"
         )
-        button_url, button_text = self._parse_initial_complaint_button(r)
+        button_url, button_text = self._parse_initial_document_button(r)
         self.assertEqual("Buy Initial Document", button_text)
-        self.assertEqual(initial_complaint_5.pacer_url, button_url)
+        self.assertEqual(initial_document_5.pacer_url, button_url)
 
         "Lorem Bankruptcy vs Petition Available"
 
@@ -2547,9 +2547,9 @@ def test_initial_complaint_button(self) -> None:
         r = async_to_sync(self._test_article_count)(
             cd, 1, "Complaint available"
         )
-        button_url, button_text = self._parse_initial_complaint_button(r)
+        button_url, button_text = self._parse_initial_document_button(r)
         self.assertEqual("Initial Document", button_text)
-        self.assertEqual(initial_complaint_6.get_absolute_url(), button_url)
+        self.assertEqual(initial_document_6.get_absolute_url(), button_url)
 
         # Bankruptcy document initial petition not available. Show Buy button.
         cd = {
@@ -2559,12 +2559,12 @@ def test_initial_complaint_button(self) -> None:
         r = async_to_sync(self._test_article_count)(
             cd, 1, "Complaint Not available"
         )
-        button_url, button_text = self._parse_initial_complaint_button(r)
+        button_url, button_text = self._parse_initial_document_button(r)
         self.assertEqual(
             "Buy Initial Document",
             button_text,
         )
-        self.assertEqual(initial_complaint_7.pacer_url, button_url)
+        self.assertEqual(initial_document_7.pacer_url, button_url)
 
         for docket in dockets_to_remove:
             docket.delete()

From 636768c9f8c2f188718d561c8e5a0a71ea0b76ca Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Thu, 5 Sep 2024 14:29:18 -0600
Subject: [PATCH 329/372] Attempt to match attachment main_rd against all
 attachment doc id's

---
 cl/recap/mergers.py | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index abe1ada1b0..0278a51565 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -1677,12 +1677,54 @@ async def merge_attachment_page_data(
             # with the wrong case. We must punt.
             raise exc
     except RECAPDocument.DoesNotExist as exc:
+        found_main_rd = False
+        if not is_acms_attachment:
+            for attachment in attachment_dicts:
+                if attachment.get("pacer_doc_id", False):
+                    params["pacer_doc_id"] = attachment["pacer_doc_id"]
+                try:
+                    main_rd = await RECAPDocument.objects.select_related(
+                        "docket_entry", "docket_entry__docket"
+                    ).aget(**params)
+                    found_main_rd = True
+                    break
+                except RECAPDocument.MultipleObjectsReturned as exc:
+                    if pacer_case_id:
+                        duplicate_rd_queryset = RECAPDocument.objects.filter(
+                            **params
+                        )
+                        rd_with_pdf_queryset = duplicate_rd_queryset.filter(
+                            is_available=True
+                        ).exclude(filepath_local="")
+                        if await rd_with_pdf_queryset.aexists():
+                            keep_rd = await rd_with_pdf_queryset.alatest(
+                                "date_created"
+                            )
+                        else:
+                            keep_rd = await duplicate_rd_queryset.alatest(
+                                "date_created"
+                            )
+                        await duplicate_rd_queryset.exclude(
+                            pk=keep_rd.pk
+                        ).adelete()
+                        main_rd = await RECAPDocument.objects.select_related(
+                            "docket_entry", "docket_entry__docket"
+                        ).aget(**params)
+                        found_main_rd = True
+                        break
+                    else:
+                        # Unclear how to proceed and we don't want to associate
+                        # this data with the wrong case. We must punt.
+                        raise exc
+                except RECAPDocument.DoesNotExist:
+                    continue
         # Can't find the docket to associate with the attachment metadata
         # It may be possible to go look for orphaned documents at this stage
         # and to then add them here, as we do when adding dockets. This need is
         # particularly acute for those that get free look emails and then go to
         # the attachment page.
-        raise exc
+        if not found_main_rd:
+            raise exc
 
     # We got the right item. Update/create all the attachments for
     # the docket entry.

From d5de133cb1d79d21185b43ee0894579a26698770 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 6 Sep 2024 16:00:50 -0500
Subject: [PATCH 330/372] fix(recap): Added
 test_main_rd_lookup_fallback_for_attachment_merging

---
 cl/recap/tests.py | 87 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 4fdd6ba2dc..484da62371 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -81,6 +81,7 @@
     add_parties_and_attorneys,
     find_docket_object,
     get_order_of_docket,
+    merge_attachment_page_data,
     normalize_long_description,
     update_case_names,
     update_docket_metadata,
@@ -2826,6 +2827,92 @@ def test_main_document_doesnt_match_attachment_zero_existing(
         self.assertEqual(attachment_1.attachment_number, 1)
         self.assertEqual(attachment_1.description, "Attachment 1")
 
+    @mock.patch(
+        "cl.api.webhooks.requests.post",
+        side_effect=lambda *args, **kwargs: MockResponse(200, mock_raw=True),
+    )
+    def test_main_rd_lookup_fallback_for_attachment_merging(
+        self,
+        mock_solr,
+        mock_webhook_post,
+    ):
+        """Confirm that attachment data can be properly merged when the current
+        main_rd pacer_doc_id mismatches the main document's pacer_doc_id from
+        the attachment page.
+        """
+        docket = DocketFactory(
+            source=Docket.RECAP,
+            court=self.court,
+            pacer_case_id="238743",
+        )
+        docket_data_no_att = DocketDataWithAttachmentsFactory(
+            docket_entries=[
+                DocketEntryWithAttachmentsDataFactory(
+                    document_number=1,
+                    pacer_doc_id="12606200429",
+                    attachments=[],
+                ),
+            ],
+        )
+        async_to_sync(add_docket_entries)(
+            docket, docket_data_no_att["docket_entries"]
+        )
+
+        # When attachment data is unknown, the main PACER_DOCUMENT is the one
+        # with pacer_doc_id: 12606200429
+        main_rd = RECAPDocument.objects.get(pacer_doc_id="12606200429")
+        self.assertEqual(
+            main_rd.document_type,
+            RECAPDocument.PACER_DOCUMENT,
+            msg="PACER_DOCUMENT type didn't match.",
+        )
+        self.assertEqual(main_rd.attachment_number, None)
+
+        # Merge attachment data where the main_document pacer_doc_id has a
+        # different pacer_doc_id: 12606201629
+        attachments_data = AppellateAttachmentPageFactory(
+            document_number=1,
+            pacer_doc_id="12606201629",
+            attachments=[
+                AppellateAttachmentFactory(
+                    attachment_number=1,
+                    pacer_doc_id="12606200429",
+                    description="Attachment 1",
+                ),
+            ],
+        )
+        async_to_sync(merge_attachment_page_data)(
+            docket.court,
+            docket.pacer_case_id,
+            attachments_data["pacer_doc_id"],
+            None,
+            "",
+            attachments_data["attachments"],
+        )
+
+        # Now we should have 2 RDs in the entry: the main document + 1 attachment
+        de_rds = RECAPDocument.objects.all()
+        self.assertEqual(de_rds.count(), 2)
+
+        # Confirm main_rd is now the one with pacer_doc_id:12606201629
+        main_rd = RECAPDocument.objects.get(pacer_doc_id="12606201629")
+        self.assertEqual(
+            main_rd.document_type,
+            RECAPDocument.PACER_DOCUMENT,
+            msg="PACER_DOCUMENT type didn't match.",
+        )
+        self.assertEqual(main_rd.attachment_number, None)
+
+        # Confirm attachment 1 is the one with pacer_doc_id:12606200429
+        attachment_1 = RECAPDocument.objects.get(pacer_doc_id="12606200429")
+        self.assertEqual(
+            attachment_1.document_type,
+            RECAPDocument.ATTACHMENT,
+            msg="ATTACHMENT type didn't match.",
+        )
+        self.assertEqual(attachment_1.attachment_number, 1)
+        self.assertEqual(attachment_1.description, "Attachment 1")
+
 
 class ClaimsRegistryTaskTest(TestCase):
     """Can we handle claims registry uploads?"""

From 6fac4cfb29a6e45981f18c12b14a0d76db29fc6e Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Fri, 6 Sep 2024 18:07:07 -0600
Subject: [PATCH 331/372] Create the main_rd from attachments if needed

---
 cl/recap/mergers.py | 32 +++++++++++++++++++++++++++++---
 cl/recap/tests.py   |  6 +++---
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 0278a51565..6eccba198e 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -1630,6 +1630,9 @@ async def merge_attachment_page_data(
     and the DocketEntry object associated with the RECAPDocuments
     :raises: RECAPDocument.MultipleObjectsReturned, RECAPDocument.DoesNotExist
     """
+    # Create/update the attachment items.
+    rds_created = []
+    rds_affected = []
     params = {
         "pacer_doc_id": pacer_doc_id,
         "docket_entry__docket__court": court,
@@ -1678,6 +1681,7 @@ async def merge_attachment_page_data(
             raise exc
     except RECAPDocument.DoesNotExist as exc:
         found_main_rd = False
+        migrated_description = ""
         if not is_acms_attachment:
             for attachment in attachment_dicts:
                 if attachment.get("pacer_doc_id", False):
@@ -1686,6 +1690,13 @@ async def merge_attachment_page_data(
                     main_rd = await RECAPDocument.objects.select_related(
                         "docket_entry", "docket_entry__docket"
                     ).aget(**params)
+                    if attachment.get("attachment_number", 0) != 0:
+                        main_rd.attachment_number = attachment[
+                            "attachment_number"
+                        ]
+                        main_rd.document_type = RECAPDocument.ATTACHMENT
+                        migrated_description = main_rd.description
+                        await main_rd.asave()
                     found_main_rd = True
                     break
                 except RECAPDocument.MultipleObjectsReturned as exc:
@@ -1710,6 +1721,13 @@ async def merge_attachment_page_data(
                         main_rd = await RECAPDocument.objects.select_related(
                             "docket_entry", "docket_entry__docket"
                         ).aget(**params)
+                        if attachment.get("attachment_number", 0) != 0:
+                            main_rd.attachment_number = attachment[
+                                "attachment_number"
+                            ]
+                            main_rd.document_type = RECAPDocument.ATTACHMENT
+                            migrated_description = main_rd.description
+                            await main_rd.asave()
                         found_main_rd = True
                         break
                     else:
@@ -1725,6 +1743,17 @@ async def merge_attachment_page_data(
         # the attachment page.
         if not found_main_rd:
             raise exc
+        else:
+            rd = RECAPDocument(
+                docket_entry=main_rd.docket_entry,
+                document_type=RECAPDocument.PACER_DOCUMENT,
+                document_number=main_rd.document_number,
+                description=migrated_description,
+                pacer_doc_id=pacer_doc_id,
+            )
+            rds_created.append(rd)
+            rds_affected.append(rd)
+            await rd.asave()
 
     # We got the right item. Update/create all the attachments for
     # the docket entry.
@@ -1748,9 +1777,6 @@ async def merge_attachment_page_data(
             ContentFile(text.encode()),
         )
 
-    # Create/update the attachment items.
-    rds_created = []
-    rds_affected = []
     appellate_court_ids = Court.federal_courts.appellate_pacer_courts()
     court_is_appellate = await appellate_court_ids.filter(
         pk=court.pk
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 484da62371..16e27db6ff 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -2664,9 +2664,7 @@ def tearDown(self) -> None:
         self.pq.filepath_local.delete()
         self.pq.delete()
         Docket.objects.all().delete()
-        RECAPDocument.objects.filter(
-            document_type=RECAPDocument.ATTACHMENT,
-        ).delete()
+        RECAPDocument.objects.all().delete()
 
     def test_attachments_get_created(self, mock):
         """Do attachments get created if we have a RECAPDocument to match
@@ -2850,6 +2848,7 @@ def test_main_rd_lookup_fallback_for_attachment_merging(
                 DocketEntryWithAttachmentsDataFactory(
                     document_number=1,
                     pacer_doc_id="12606200429",
+                    short_description="Complaint",
                     attachments=[],
                 ),
             ],
@@ -2902,6 +2901,7 @@ def test_main_rd_lookup_fallback_for_attachment_merging(
             msg="PACER_DOCUMENT type didn't match.",
         )
         self.assertEqual(main_rd.attachment_number, None)
+        self.assertEqual(main_rd.description, "Complaint")
 
         # Confirm attachment 1 is the one with pacer_doc_id:12606200429
         attachment_1 = RECAPDocument.objects.get(pacer_doc_id="12606200429")

From f5080cb8bd956d3444b54b0007a17d9c83b3f93a Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 9 Sep 2024 11:40:02 -0500
Subject: [PATCH 332/372] fix(django): Use production indexes names to ensure
 migration files are backward compatible

---
 cl/favorites/migrations/0001_initial.py            |  8 ++++----
 cl/lasc/migrations/0001_initial.py                 |  2 +-
 cl/recap/migrations/0002_initial_part_two.py       |  2 +-
 cl/search/migrations/0001_initial.py               | 12 ++++--------
 cl/search/migrations/0006_delete_unused_indexes.py |  8 +-------
 5 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/cl/favorites/migrations/0001_initial.py b/cl/favorites/migrations/0001_initial.py
index 8b8cb240dc..452663241c 100644
--- a/cl/favorites/migrations/0001_initial.py
+++ b/cl/favorites/migrations/0001_initial.py
@@ -41,7 +41,7 @@ class Migration(migrations.Migration):
             options={
                 'unique_together': {('user', 'name')},
                 'indexes': [
-                    models.Index(fields=['user', 'name'], name='favorites_usertag_user_name_idx'),
+                    models.Index(fields=['user', 'name'], name='favorites_u_user_id_f6c9a6_idx'),
                 ],
             },
         ),
@@ -61,9 +61,9 @@ class Migration(migrations.Migration):
             ],
             options={
                 'indexes': [
-                    models.Index(fields=['recap_document', 'user'], name='favorites_prayer_recap_document_user_idx'),
-                    models.Index(fields=['recap_document', 'status'], name='favorites_prayer_recap_document_status_idx'),
-                    models.Index(fields=['date_created', 'user', 'status'], name='favorites_prayer_date_created_user_status_idx'),
+                    models.Index(fields=['recap_document', 'user'], name='favorites_p_recap_d_7c046c_idx'),
+                    models.Index(fields=['recap_document', 'status'], name='favorites_p_recap_d_00e8c5_idx'),
+                    models.Index(fields=['date_created', 'user', 'status'], name='favorites_p_date_cr_8bf054_idx'),
                 ],
             },
         ),
diff --git a/cl/lasc/migrations/0001_initial.py b/cl/lasc/migrations/0001_initial.py
index 9c19da4e3d..1c5ee54ee9 100644
--- a/cl/lasc/migrations/0001_initial.py
+++ b/cl/lasc/migrations/0001_initial.py
@@ -42,7 +42,7 @@ class Migration(migrations.Migration):
             ],
             options={
                 'indexes': [
-                    models.Index(fields=['docket_number', 'district', 'division_code'], name='lasc_docket_docket_number_district_division_code_idx'),
+                    models.Index(fields=['docket_number', 'district', 'division_code'], name='lasc_docket_docket__4b4f04_idx'),
                 ],
             },
         ),
diff --git a/cl/recap/migrations/0002_initial_part_two.py b/cl/recap/migrations/0002_initial_part_two.py
index 6a7d79d4c7..27458b19d5 100644
--- a/cl/recap/migrations/0002_initial_part_two.py
+++ b/cl/recap/migrations/0002_initial_part_two.py
@@ -94,6 +94,6 @@ class Migration(migrations.Migration):
         ),
         migrations.AddIndex(
             model_name='fjcintegrateddatabase',
-            index=models.Index(fields=['district', 'docket_number'], name='recap_fjc_district_docket_number_idx'),
+            index=models.Index(fields=['district', 'docket_number'], name='recap_fjcin_distric_731c7b_idx'),
         ),
     ]
diff --git a/cl/search/migrations/0001_initial.py b/cl/search/migrations/0001_initial.py
index cf773e885d..f8defc0a2e 100644
--- a/cl/search/migrations/0001_initial.py
+++ b/cl/search/migrations/0001_initial.py
@@ -421,7 +421,7 @@ class Migration(migrations.Migration):
         ),
         migrations.AddIndex(
             model_name='recapdocument',
-            index=models.Index(fields=['document_type', 'document_number', 'attachment_number'], name='search_recapdocument_document_type_document_num_attachment_num_idx'),
+            index=models.Index(fields=['document_type', 'document_number', 'attachment_number'], name='search_reca_documen_cc5acd_idx'),
         ),
         migrations.AlterUniqueTogether(
             name='opinionscited',
@@ -429,7 +429,7 @@ class Migration(migrations.Migration):
         ),
         migrations.AddIndex(
             model_name='docketentry',
-            index=models.Index(fields=['recap_sequence_number', 'entry_number'], name='search_docketentry_recap_seq_num_entry_num_idx')
+            index=models.Index(fields=['recap_sequence_number', 'entry_number'], name='search_dock_recap_s_306ab9_idx')
         ),
         migrations.AddIndex(
             model_name='docket',
@@ -439,21 +439,17 @@ class Migration(migrations.Migration):
             name='docket',
             unique_together={('docket_number', 'pacer_case_id', 'court')},
         ),
-        migrations.AddIndex(
-            model_name='docket',
-            index=models.Index(fields=['ia_upload_failure_count', 'ia_needs_upload', 'ia_date_first_change'], name='search_docket_ia_upload_failure_count_ia_needs_upload_ia_date_first_change_idx'),
-        ),
         migrations.AlterUniqueTogether(
             name='citation',
             unique_together={('cluster', 'volume', 'reporter', 'page')},
         ),
         migrations.AddIndex(
             model_name='citation',
-            index=models.Index(fields=['volume', 'reporter', 'page'], name='search_citation_volume_reporter_page_idx')
+            index=models.Index(fields=['volume', 'reporter', 'page'], name='search_cita_volume_92c344_idx')
         ),
         migrations.AddIndex(
             model_name='citation',
-            index=models.Index(fields=['volume', 'reporter'], name='search_citation_volume_reporter_idx')
+            index=models.Index(fields=['volume', 'reporter'], name='search_cita_volume_464334_idx')
         ),
 
     ]
diff --git a/cl/search/migrations/0006_delete_unused_indexes.py b/cl/search/migrations/0006_delete_unused_indexes.py
index c89db7765d..40a5de2a8e 100644
--- a/cl/search/migrations/0006_delete_unused_indexes.py
+++ b/cl/search/migrations/0006_delete_unused_indexes.py
@@ -144,11 +144,5 @@ class Migration(migrations.Migration):
                 help_text="The moment when this item first changed and was marked as needing an upload. Used for determining when to upload an item.",
                 null=True,
             ),
-        ),
-        migrations.AlterModelOptions(
-            name="docket",
-            options={
-                'indexes': [],
-            },
-        ),
+        )
     ]

From c6f4e2da6e7395d201f9ba54267cfa227825ff83 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 9 Sep 2024 16:59:46 -0500
Subject: [PATCH 333/372] fix(templates): Use the new querystring  template tag

---
 cl/assets/templates/includes/pagination.html    |  8 ++++----
 cl/custom_filters/templatetags/extras.py        | 17 +++++++++++++++++
 cl/opinion_page/templates/docket_tabs.html      |  2 +-
 .../templates/includes/authorities_list.html    |  2 +-
 .../templates/includes/de_filter.html           |  4 ++--
 .../templates/includes/opinions_sidebar.html    |  4 ++--
 cl/opinion_page/templates/opinion.html          |  4 ++--
 .../templates/opinion_authorities.html          |  6 +++---
 .../templates/opinion_summaries.html            |  8 ++++----
 .../templates/opinion_visualizations.html       |  2 +-
 cl/search/templates/includes/no_results.html    |  2 +-
 .../templates/includes/pa_search_result.html    |  4 ++--
 cl/search/templates/includes/search_result.html |  8 ++++----
 .../templates/includes/search_result_solr.html  |  4 ++--
 cl/search/templates/search.html                 |  6 +++---
 .../templates/visualization_embedded.html       |  2 +-
 16 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/cl/assets/templates/includes/pagination.html b/cl/assets/templates/includes/pagination.html
index c84280bdc9..55067fa191 100644
--- a/cl/assets/templates/includes/pagination.html
+++ b/cl/assets/templates/includes/pagination.html
@@ -5,14 +5,14 @@
 	    <div class="col-xs-3">
         {% if page_obj.has_previous %}
           {% if not hide_first %}
-            <a href="?{{ get_string }}page=1"
+            <a href="{% querystring page=1 %}"
                rel="first" class="hidden-xs btn btn-default" >
               <i class="fa fa-caret-left no-underline" ></i>
               <i class="fa fa-caret-left no-underline" ></i>
 			        <span class="">First</span>
             </a>
           {% endif %}
-          <a href="?{{ get_string }}page={{ page_obj.previous_page_number }}"
+          <a href="{% querystring page=page_obj.previous_page_number %}"
              rel="prev"
              class="btn btn-default" >
             <i class="fa fa-caret-left no-underline" ></i>
@@ -25,14 +25,14 @@
       </div>
       <div class="col-xs-3 text-right" >
         {% if page_obj.has_next %}
-          <a href="?{{ get_string }}page={{ page_obj.next_page_number }}"
+          <a href="{% querystring page=page_obj.next_page_number %}"
              rel="next"
              class="btn btn-default" >
 			      <span class="hidden-xs">Next</span>
             <i class="fa fa-caret-right no-underline" ></i>
           </a>
           {% if not hide_last %}
-            <a href="?{{ get_string }}page={{ page_obj.paginator.num_pages }}"
+            <a href="{% querystring page=page_obj.paginator.num_pages %}"
                rel="last" class="hidden-xs btn btn-default" >
 			        <span class="">Last</span>
               <i class="fa fa-caret-right no-underline" ></i>
diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 6b6272cdfc..cbcb53c062 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -285,3 +285,20 @@ def format_date(date_str: str) -> str:
         return date_obj.strftime("%B %dth, %Y")
     except (ValueError, TypeError):
         return date_str
+
+
+@register.filter
+def build_docket_id_q_param(request_q: str, docket_id: str) -> str:
+    """Build a query string that includes the docket ID and any existing query
+    parameters.
+
+    :param request_q: The current query string, if present.
+    :param docket_id: The docket_id to append to the query string.
+    :return:The query string with the docket_id included.
+    """
+
+    parts = []
+    if request_q:
+        parts.append(f"({request_q})")
+    parts.append(f"docket_id:{docket_id}")
+    return mark_safe(" AND ".join(parts))
diff --git a/cl/opinion_page/templates/docket_tabs.html b/cl/opinion_page/templates/docket_tabs.html
index 9357b0bc1a..073d0739ad 100644
--- a/cl/opinion_page/templates/docket_tabs.html
+++ b/cl/opinion_page/templates/docket_tabs.html
@@ -445,7 +445,7 @@ <h3>Oral Argument Recordings</h3>
       <ul>
         {% for af in docket.audio_files.all %}
           <li>
-            <a href="{{ af.get_absolute_url }}?{{ request.META.QUERY_STRING }}">
+            <a href="{{ af.get_absolute_url }}{% querystring %}">
                 {{ af|best_case_name|safe|v_wrapper }}
             </a>
             {% if perms.audio.change_audio %}
diff --git a/cl/opinion_page/templates/includes/authorities_list.html b/cl/opinion_page/templates/includes/authorities_list.html
index a914a9fced..279cd34df2 100644
--- a/cl/opinion_page/templates/includes/authorities_list.html
+++ b/cl/opinion_page/templates/includes/authorities_list.html
@@ -4,7 +4,7 @@
   {% for authority in authorities %}
   <li>
     {{ authority.depth }} reference{{ authority.depth|pluralize }} to
-    <a href="{{ authority.cited_opinion.cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}" {% if authority.blocked %}rel="nofollow" {% endif %}>
+    <a href="{{ authority.cited_opinion.cluster.get_absolute_url }}{% querystring %}" {% if authority.blocked %}rel="nofollow" {% endif %}>
       {{ authority.cited_opinion.cluster.caption|safe|v_wrapper }}
     </a>
     <br />
diff --git a/cl/opinion_page/templates/includes/de_filter.html b/cl/opinion_page/templates/includes/de_filter.html
index a1946b23a2..eda1135fca 100644
--- a/cl/opinion_page/templates/includes/de_filter.html
+++ b/cl/opinion_page/templates/includes/de_filter.html
@@ -85,7 +85,7 @@
         <div class="tight-input col-xs-6 hidden-sm col-sm-6 col-md-1 col-lg-2" >
           <div class="pull-right" >
             {% if docket_entries.has_previous %}
-              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.previous_page_number }}" rel="prev" >
+              <a class="btn btn-default" href="{% querystring page=docket_entries.previous_page_number %}" rel="prev" >
                 <i class="fa fa-caret-left" ></i><span class="hidden-md" >&nbsp;Prev.</span>
               </a>
             {% else %}
@@ -94,7 +94,7 @@
               </a>
             {% endif %}
             {% if docket_entries.has_next %}
-              <a class="btn btn-default" href="?{{ get_string }}page={{ docket_entries.next_page_number }}" rel="next" >
+              <a class="btn btn-default" href="{% querystring page=docket_entries.next_page_number %}" rel="next" >
                 <span class="hidden-md" >Next&nbsp;</span><i class="fa fa-caret-right"></i>
               </a>
             {% else %}
diff --git a/cl/opinion_page/templates/includes/opinions_sidebar.html b/cl/opinion_page/templates/includes/opinions_sidebar.html
index b15dd44002..8ca4a0881c 100644
--- a/cl/opinion_page/templates/includes/opinions_sidebar.html
+++ b/cl/opinion_page/templates/includes/opinions_sidebar.html
@@ -4,7 +4,7 @@
   {% flag "o-es-active" %}
     {% for opinion in opinions.object_list %}
         <li>
-            <a href="{{ opinion.absolute_url }}?{{ request.META.QUERY_STRING }}">
+            <a href="{{ opinion.absolute_url }}{% querystring %}">
                 {% with opinion.title as title  %}
                   {{ opinion.caseName|default:title|default_if_none:"N/A"|safe|truncatewords:10|v_wrapper }}
                 {% endwith %}
@@ -14,7 +14,7 @@
   {% else %}
     {% for opinion in opinions %}
         <li>
-            <a href="{{ opinion.absolute_url }}?{{ request.META.QUERY_STRING }}">
+            <a href="{{ opinion.absolute_url }}{% querystring %}">
                 {% with opinion.title as title  %}
                   {{ opinion.caseName|default:title|default_if_none:"N/A"|safe|truncatewords:10|v_wrapper }}
                 {% endwith %}
diff --git a/cl/opinion_page/templates/opinion.html b/cl/opinion_page/templates/opinion.html
index a5992f8189..290e837ad8 100644
--- a/cl/opinion_page/templates/opinion.html
+++ b/cl/opinion_page/templates/opinion.html
@@ -100,7 +100,7 @@ <h3><span>Summaries ({{ summaries_count|intcomma }})</span></h3>
             {% endfor %}
           </ul>
           <h4>
-            <a href="{% url "view_summaries" pk=cluster.pk slug=cluster.slug %}?{{ request.META.QUERY_STRING }}"
+            <a href="{% url "view_summaries" pk=cluster.pk slug=cluster.slug %}{% querystring %}"
                class="btn btn-default">
               View All Summaries
             </a>
@@ -123,7 +123,7 @@ <h3>
             <ul>
               {% for citing_cluster in citing_clusters %}
                 <li>
-                  <a href="{{ citing_cluster.absolute_url }}?{{ request.META.QUERY_STRING }}">{{ citing_cluster.caseName|safe|truncatewords:12|v_wrapper }} ({{ citing_cluster.dateFiled|date:"Y" }})</a>
+                  <a href="{{ citing_cluster.absolute_url }}{% querystring %}">{{ citing_cluster.caseName|safe|truncatewords:12|v_wrapper }} ({{ citing_cluster.dateFiled|date:"Y" }})</a>
                 </li>
               {% endfor %}
             </ul>
diff --git a/cl/opinion_page/templates/opinion_authorities.html b/cl/opinion_page/templates/opinion_authorities.html
index a146143ba2..50ed524a89 100644
--- a/cl/opinion_page/templates/opinion_authorities.html
+++ b/cl/opinion_page/templates/opinion_authorities.html
@@ -18,7 +18,7 @@
         <div class="sidebar-section">
             <h4 class="bottom">
                 <i class="fa fa-arrow-circle-o-left gray"></i>
-                <a href="{{ cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}"
+                <a href="{{ cluster.get_absolute_url }}{% querystring %}"
                    {% if cluster.blocked %}rel="nofollow"{% endif %}>Back to Opinion</a>
             </h4>
         </div>
@@ -28,7 +28,7 @@ <h4 class="bottom">
 {% block content %}
     <div class="col-sm-9">
         <h2>
-          <a href="{{ cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}"
+          <a href="{{ cluster.get_absolute_url }}{% querystring %}"
              class="black-link no-underline"
              {% if cluster.blocked %}rel="nofollow"{% endif %}>{{ caption|safe|v_wrapper }}</a>
         </h2>
@@ -41,7 +41,7 @@ <h3>This opinion cites {{ authorities_with_data|length|intcomma }} opinion{{ aut
                 {% for authority in authorities_with_data %}
                     <li>
                         {{ authority.citation_depth }} reference{{ authority.citation_depth|pluralize }} to
-                        <a href="{{ authority.get_absolute_url }}?{{ request.META.QUERY_STRING }}"
+                        <a href="{{ authority.get_absolute_url }}{% querystring %}"
                            {% if authority.blocked %}rel="nofollow"{% endif %}>
                             {{ authority.caption|safe|v_wrapper }}
                         </a>
diff --git a/cl/opinion_page/templates/opinion_summaries.html b/cl/opinion_page/templates/opinion_summaries.html
index eb8c8e7b87..1cafb08765 100644
--- a/cl/opinion_page/templates/opinion_summaries.html
+++ b/cl/opinion_page/templates/opinion_summaries.html
@@ -19,7 +19,7 @@
     <div class="sidebar-section">
       <h4 class="bottom">
         <i class="fa fa-arrow-circle-o-left gray"></i>
-        <a href="{{ cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}"
+        <a href="{{ cluster.get_absolute_url }}{% querystring %}"
            {% if cluster.blocked %}rel="nofollow"{% endif %}>Back to Opinion</a>
       </h4>
     </div>
@@ -29,7 +29,7 @@ <h4 class="bottom">
 {% block content %}
   <div class="col-sm-9">
     <h2>
-      <a href="{{ cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}"
+      <a href="{{ cluster.get_absolute_url }}{% querystring %}"
          class="black-link no-underline"
          {% if cluster.blocked %}rel="nofollow"{% endif %}>{{ caption|safe|v_wrapper }}</a>
     </h2>
@@ -53,7 +53,7 @@ <h3>{{ summaries_count|intcomma }} judge-written summar{{ summaries_count|plural
                 <div class="summary-group-metadata">
                   <span class="bullet-tail">{{ representative_cluster.date_filed }}</span>
                   <span class="bullet-tail">
-                    <a href="{{ representative_cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}">
+                    <a href="{{ representative_cluster.get_absolute_url }}{% querystring %}">
                         {{ representative_cluster|best_case_name|safe }}
                     </a>
                   </span>
@@ -78,7 +78,7 @@ <h3>{{ summaries_count|intcomma }} judge-written summar{{ summaries_count|plural
                         <br/>
                         <span class="bullet-tail">{{ describing_cluster.date_filed }}</span>
                         <span class="bullet-tail">
-                          <a href="{{ describing_cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}">
+                          <a href="{{ describing_cluster.get_absolute_url }}{% querystring %}">
                               {{ describing_cluster|best_case_name|safe }}
                           </a>
                         </span>
diff --git a/cl/opinion_page/templates/opinion_visualizations.html b/cl/opinion_page/templates/opinion_visualizations.html
index 0d1811c6dc..5b531096e9 100644
--- a/cl/opinion_page/templates/opinion_visualizations.html
+++ b/cl/opinion_page/templates/opinion_visualizations.html
@@ -19,7 +19,7 @@
         <div class="sidebar-section">
             <h4 class="bottom">
                 <i class="fa fa-arrow-circle-o-left gray"></i>
-                <a href="{{ cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}">Back
+                <a href="{{ cluster.get_absolute_url }}{% querystring %}">Back
                     to Opinion</a>
             </h4>
         </div>
diff --git a/cl/search/templates/includes/no_results.html b/cl/search/templates/includes/no_results.html
index 343f51eb74..5e25f1ec84 100644
--- a/cl/search/templates/includes/no_results.html
+++ b/cl/search/templates/includes/no_results.html
@@ -47,7 +47,7 @@ <h4 class="text-danger" >Are you attempting to perform a proximity search within
       <p>Proximity queries do not work in filters. Consider using the main search box. For more details, visit our <a href="{% url "advanced_search" %}#proximity">advance search documentation</a>.</p>
     {% elif suggested_query %}
       <div class="flex">
-        <h4 class="text-danger" >Did you mean:&nbsp;<a href="{% url 'show_results' %}?q={{suggested_query}}{% if request.GET.type %}&type={{request.GET.type}}{% endif %}">{{suggested_query}}</a></h4>
+        <h4 class="text-danger" >Did you mean:&nbsp;<a href="{% url 'show_results' %}{% querystring q=suggested_query %}">{{suggested_query}}</a></h4>
       </div>
     {% endif %}
 
diff --git a/cl/search/templates/includes/pa_search_result.html b/cl/search/templates/includes/pa_search_result.html
index 822c359307..feecdd1be2 100644
--- a/cl/search/templates/includes/pa_search_result.html
+++ b/cl/search/templates/includes/pa_search_result.html
@@ -7,7 +7,7 @@
   {% with opinion=result.grouped_by_opinion_cluster_id.hits.hits.0|get_attrdict:"_source" %}
   <article>
     <h3 class="bottom serif">
-      <a href="{% url 'view_case' opinion.cluster_id opinion.opinion_cluster_slug %}?{{ request.META.QUERY_STRING }}" class="visitable">
+      <a href="{% url 'view_case' opinion.cluster_id opinion.opinion_cluster_slug %}{% querystring %}" class="visitable">
         {{ opinion.caseName|safe }}
         ({% if opinion.opinion_cluster_docket_court_id != 'scotus' %}{{ opinion.citation_string|nbsp|safe }}&nbsp;{% endif %}{{ opinion.dateFiled|date:"Y" }})
       </a>
@@ -32,7 +32,7 @@ <h3 class="bottom serif">
         <div class="col-md-offset-half">
         {% with pa_group=parenthetical_group|get_attrdict:"_source" %}
           <h4>
-            <a href="{% url 'view_case' pa_group.describing_opinion_cluster_id pa_group.describing_opinion_cluster_slug %}?{{ request.META.QUERY_STRING }}" class="visitable">
+            <a href="{% url 'view_case' pa_group.describing_opinion_cluster_id pa_group.describing_opinion_cluster_slug %}{% querystring %}" class="visitable">
               {{ pa_group.representative_text|safe }}
             </a>
           </h4>
diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index bba82f0930..38765077ad 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -8,7 +8,7 @@
   {% with doc0=result.doclist.docs.0 type=search_form.type.value %}
   <h3 class="bottom serif">
     {% if type == SEARCH_TYPES.PEOPLE or type_override == SEARCH_TYPES.PEOPLE %}
-      <a href="{{ result.absolute_url }}?{{ request.META.QUERY_STRING }}"
+      <a href="{{ result.absolute_url }}{% querystring %}"
          class="visitable">
       {{ result.name|render_string_or_list|safe }}
       {% if result.court %}
@@ -24,7 +24,7 @@ <h3 class="bottom serif">
       </a>
 
     {% elif type == SEARCH_TYPES.OPINION or type_override == SEARCH_TYPES.OPINION %}
-      <a href="{{ result.absolute_url }}?{{ request.META.QUERY_STRING }}"
+      <a href="{{ result.absolute_url }}{% querystring %}"
          class="visitable">
       {{ result.caseName|render_string_or_list|safe }}
       {% url "court_homepage" pk="me" as maine_url %}
@@ -34,7 +34,7 @@ <h3 class="bottom serif">
       </a>
 
     {% elif type == SEARCH_TYPES.ORAL_ARGUMENT or type_override == SEARCH_TYPES.ORAL_ARGUMENT %}
-      <a href="{% url 'view_audio_file' result.id result.docket_slug %}?{{ request.META.QUERY_STRING }}"
+      <a href="{% url 'view_audio_file' result.id result.docket_slug %}{% querystring %}"
          class="visitable">
       {{ result.caseName|render_string_or_list|safe  }}
       ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|safe|nbsp }}&nbsp;{% endif %}{{ result.dateArgued|date:"Y" }})
@@ -217,7 +217,7 @@ <h4>
         </a>
       {% endif %}
       {% if result.child_remaining %}
-        <a href="{% url "show_results" %}?type={{ type|urlencode }}&q={% if request.GET.q %}({{ request.GET.q|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}" class="btn-default btn">
+        <a href="{% url 'show_results' %}{% querystring type=type q=request.GET.q|build_docket_id_q_param:result.docket_id %}" class="btn-default btn">
           View Additional Results for this Case
         </a>
       {% elif result.child_remaining_query_id %}
diff --git a/cl/search/templates/includes/search_result_solr.html b/cl/search/templates/includes/search_result_solr.html
index 888e8dd2cd..3573d7d79c 100644
--- a/cl/search/templates/includes/search_result_solr.html
+++ b/cl/search/templates/includes/search_result_solr.html
@@ -8,7 +8,7 @@
   {% with doc0=result.doclist.docs.0 type=search_form.type.value %}
   <h3 class="bottom serif">
     {% if type == SEARCH_TYPES.PEOPLE or type_override == SEARCH_TYPES.PEOPLE %}
-      <a href="{{ result.absolute_url }}?{{ request.META.QUERY_STRING }}"
+      <a href="{{ result.absolute_url }}{% querystring %}"
          class="visitable">
       {{ result.solr_highlights.name_reverse.0|safe }}
       {% if result.court %}
@@ -24,7 +24,7 @@ <h3 class="bottom serif">
       </a>
 
     {% elif type == SEARCH_TYPES.ORAL_ARGUMENT or type_override == SEARCH_TYPES.ORAL_ARGUMENT or type == SEARCH_TYPES.OPINION or type_override == SEARCH_TYPES.OPINION %}
-      <a href="{{ result.absolute_url }}?{{ request.META.QUERY_STRING }}"
+      <a href="{{ result.absolute_url }}{% querystring %}"
          class="visitable">
       {{ result.solr_highlights.caseName.0|safe }}
       {% url "court_homepage" pk="me" as maine_url %}
diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index 8fd9de9ea5..8a68628443 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -28,7 +28,7 @@
             <link rel="alternate"
                   type="application/rss+xml"
                   title="Atom Feed for These Search Results"
-                  href="/feed/search/?{{ get_string }}" />
+                  href="/feed/search/{% querystring %}" />
           {% endif %}
         {% endif %}
     {% endif %}
@@ -296,12 +296,12 @@ <h2 id="result-count" class="bottom">
                               Judge{{ results.paginator.count|pluralize }}
                         {% endif %}
                         {% if type == SEARCH_TYPES.OPINION or type == SEARCH_TYPES.RECAP or type == SEARCH_TYPES.DOCKETS %}
-                            <a href="/feed/search/?{{ get_string }}">
+                            <a href="/feed/search/{% querystring %}">
                                 <i class="gray fa fa-rss"
                                    title="Subscribe to a feed of these results"></i>
                             </a>
                         {% elif type == SEARCH_TYPES.ORAL_ARGUMENT %}
-                            <a href="/podcast/search/?{{ get_string }}">
+                            <a href="/podcast/search/{% querystring %}">
                                 <i class="gray fa fa-podcast"
                                    title="Subscribe to a podcast of these results"></i>
                             </a>
diff --git a/cl/visualizations/templates/visualization_embedded.html b/cl/visualizations/templates/visualization_embedded.html
index 84131fbcd2..f7f1d60a4e 100644
--- a/cl/visualizations/templates/visualization_embedded.html
+++ b/cl/visualizations/templates/visualization_embedded.html
@@ -37,7 +37,7 @@
         </div>
         <div class="col-xs-6 text-right">
             <p>
-                <a href="{{ viz.get_absolute_url }}?{{ request.GET.urlencode }}" class="btn btn-primary"
+                <a href="{{ viz.get_absolute_url }}{% querystring %}" class="btn btn-primary"
                    target="_blank">View Original</a>
             </p>
         </div>

From 917cc36eb23566ac384f8fc582f24c86787a1aaa Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 9 Sep 2024 17:35:26 -0500
Subject: [PATCH 334/372] fix(django): Removed sync_to_async decorators from
 async views that use login_required

---
 cl/favorites/views.py      |  4 ----
 cl/visualizations/views.py | 12 ------------
 2 files changed, 16 deletions(-)

diff --git a/cl/favorites/views.py b/cl/favorites/views.py
index 3c401c2410..ddb82076b3 100644
--- a/cl/favorites/views.py
+++ b/cl/favorites/views.py
@@ -54,9 +54,7 @@ async def get_note(request: HttpRequest) -> HttpResponse:
     return note
 
 
-@sync_to_async
 @login_required
-@async_to_sync
 async def save_or_update_note(request: HttpRequest) -> HttpResponse:
     """Uses ajax to save or update a note.
 
@@ -92,9 +90,7 @@ async def save_or_update_note(request: HttpRequest) -> HttpResponse:
         )
 
 
-@sync_to_async
 @login_required
-@async_to_sync
 async def delete_note(request: HttpRequest) -> HttpResponse:
     """Delete a user's note
 
diff --git a/cl/visualizations/views.py b/cl/visualizations/views.py
index b1b9555f46..c1e445457f 100644
--- a/cl/visualizations/views.py
+++ b/cl/visualizations/views.py
@@ -98,9 +98,7 @@ async def view_visualization(
     return await render_visualization_page(request, pk, embed=False)
 
 
-@sync_to_async
 @login_required
-@async_to_sync
 @never_cache
 async def new_visualization(request: HttpRequest) -> HttpResponse:
     demo_viz = (
@@ -163,9 +161,7 @@ async def new_visualization(request: HttpRequest) -> HttpResponse:
     return TemplateResponse(request, "new_visualization.html", context)
 
 
-@sync_to_async
 @login_required
-@async_to_sync
 async def edit_visualization(request: HttpRequest, pk: int) -> HttpResponse:
     # This could apparently also be done with formsets? But they seem awful.
     viz = await aget_object_or_404(SCOTUSMap, pk=pk, user=request.user)
@@ -195,9 +191,7 @@ async def edit_visualization(request: HttpRequest, pk: int) -> HttpResponse:
 
 
 @ensure_csrf_cookie
-@sync_to_async
 @login_required
-@async_to_sync
 async def delete_visualization(request: HttpRequest) -> HttpResponse:
     if is_ajax(request):
         v = await SCOTUSMap.objects.aget(
@@ -213,9 +207,7 @@ async def delete_visualization(request: HttpRequest) -> HttpResponse:
 
 
 @ensure_csrf_cookie
-@sync_to_async
 @login_required
-@async_to_sync
 async def restore_visualization(request: HttpRequest) -> HttpResponse:
     if is_ajax(request):
         v = await SCOTUSMap.objects.aget(
@@ -232,9 +224,7 @@ async def restore_visualization(request: HttpRequest) -> HttpResponse:
 
 
 @ensure_csrf_cookie
-@sync_to_async
 @login_required
-@async_to_sync
 async def share_visualization(request: HttpRequest) -> HttpResponse:
     if is_ajax(request):
         v = await SCOTUSMap.objects.aget(
@@ -250,9 +240,7 @@ async def share_visualization(request: HttpRequest) -> HttpResponse:
 
 
 @ensure_csrf_cookie
-@sync_to_async
 @login_required
-@async_to_sync
 async def privatize_visualization(request: HttpRequest) -> HttpResponse:
     if is_ajax(request):
         v = await SCOTUSMap.objects.aget(

From c74084f9a4a1476fd1dcb67e9255383b25e157c6 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 9 Sep 2024 19:22:54 -0500
Subject: [PATCH 335/372] fix(visualizations): Fixed TestVizAjaxCrud to support
 async login_required

---
 cl/search/tests/tests_es_opinion.py | 7 +++----
 cl/visualizations/tests.py          | 3 ++-
 cl/visualizations/views.py          | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 6c29e60c3a..13c0490f69 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -2333,11 +2333,10 @@ async def test_more_like_this_opinion_detail_detail(self) -> None:
 
         recommendations_expected = [
             (
-                f"/opinion/{self.opinion_cluster_2.pk}/{self.opinion_cluster_2.slug}/?",
+                f"/opinion/{self.opinion_cluster_2.pk}/{self.opinion_cluster_2.slug}/",
                 "Howard v. Honda",
             )
         ]
-
         # Test if related opinion exist in expected order
         self.assertEqual(
             recommendations_expected,
@@ -2370,11 +2369,11 @@ async def test_more_like_this_opinion_detail_no_filter(self) -> None:
 
         recommendations_expected = [
             (
-                f"/opinion/{self.opinion_cluster_2.pk}/{self.opinion_cluster_2.slug}/?",
+                f"/opinion/{self.opinion_cluster_2.pk}/{self.opinion_cluster_2.slug}/",
                 "Howard v. Honda",
             ),
             (
-                f"/opinion/{self.opinion_cluster_3.pk}/{self.opinion_cluster_3.slug}/?",
+                f"/opinion/{self.opinion_cluster_3.pk}/{self.opinion_cluster_3.slug}/",
                 "case name cluster 3",
             ),
         ]
diff --git a/cl/visualizations/tests.py b/cl/visualizations/tests.py
index c5b1f1976d..96a89adf1c 100644
--- a/cl/visualizations/tests.py
+++ b/cl/visualizations/tests.py
@@ -295,7 +295,8 @@ async def _build_post(
             data = {}
         post = self.factory.post(url, data=data)
         if username:
-            post.user = await User.objects.aget(username=username)
+            user = await User.objects.aget(username=username)
+            post.auser = sync_to_async(lambda: user)
         post.META["HTTP_X_REQUESTED_WITH"] = "XMLHttpRequest"
         return post
 
diff --git a/cl/visualizations/views.py b/cl/visualizations/views.py
index c1e445457f..306f84ce54 100644
--- a/cl/visualizations/views.py
+++ b/cl/visualizations/views.py
@@ -195,7 +195,7 @@ async def edit_visualization(request: HttpRequest, pk: int) -> HttpResponse:
 async def delete_visualization(request: HttpRequest) -> HttpResponse:
     if is_ajax(request):
         v = await SCOTUSMap.objects.aget(
-            pk=request.POST.get("pk"), user=request.user
+            pk=request.POST.get("pk"), user=await request.auser()
         )
         v.deleted = True
         await v.asave()
@@ -211,7 +211,7 @@ async def delete_visualization(request: HttpRequest) -> HttpResponse:
 async def restore_visualization(request: HttpRequest) -> HttpResponse:
     if is_ajax(request):
         v = await SCOTUSMap.objects.aget(
-            pk=request.POST.get("pk"), user=request.user
+            pk=request.POST.get("pk"), user=await request.auser()
         )
         v.deleted = False
         v.date_deleted = None
@@ -228,7 +228,7 @@ async def restore_visualization(request: HttpRequest) -> HttpResponse:
 async def share_visualization(request: HttpRequest) -> HttpResponse:
     if is_ajax(request):
         v = await SCOTUSMap.objects.aget(
-            pk=request.POST.get("pk"), user=request.user
+            pk=request.POST.get("pk"), user=await request.auser()
         )
         v.published = True
         await v.asave()
@@ -244,7 +244,7 @@ async def share_visualization(request: HttpRequest) -> HttpResponse:
 async def privatize_visualization(request: HttpRequest) -> HttpResponse:
     if is_ajax(request):
         v = await SCOTUSMap.objects.aget(
-            pk=request.POST.get("pk"), user=request.user
+            pk=request.POST.get("pk"), user=await request.auser()
         )
         v.published = False
         await v.asave()

From 694f03eb11b3fdd5ea0ed08a9b73e193250a160d Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 9 Sep 2024 20:38:09 -0500
Subject: [PATCH 336/372] fix(django): Rename indexes to match replica names

---
 ...favorites_p_recap_d_00e8c5_idx_and_more.py |   8 +-
 ...avorites_p_recap_d_00e8c5_idx_and_more.sql |  16 +--
 ...ion_code_lasc_docket_docket__4b4f04_idx.py |   2 +-
 ...on_code_lasc_docket_docket__4b4f04_idx.sql |   4 +-
 cl/opinion_page/views.py                      |   2 -
 ...t_number_recap_fjcin_distric_731c7b_idx.py |   2 +-
 ..._number_recap_fjcin_distric_731c7b_idx.sql |   4 +-
 ...date_or_delete_snapshot_update_and_more.py |   8 +-
 ...ate_or_delete_snapshot_update_and_more.sql | 104 +++++++++---------
 9 files changed, 74 insertions(+), 76 deletions(-)

diff --git a/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.py b/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.py
index 63fb07b217..ad00c057e4 100644
--- a/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.py
+++ b/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.py
@@ -15,22 +15,22 @@ class Migration(migrations.Migration):
     operations = [
         migrations.RenameIndex(
             model_name="prayer",
-            new_name="favorites_p_recap_d_00e8c5_idx",
+            new_name="favorites_prayer_recap_document_id_status_82e2dbbb_idx",
             old_fields=("recap_document", "status"),
         ),
         migrations.RenameIndex(
             model_name="prayer",
-            new_name="favorites_p_date_cr_8bf054_idx",
+            new_name="favorites_prayer_date_created_user_id_status_880d7280_idx",
             old_fields=("date_created", "user", "status"),
         ),
         migrations.RenameIndex(
             model_name="prayer",
-            new_name="favorites_p_recap_d_7c046c_idx",
+            new_name="favorites_prayer_recap_document_id_user_id_c5d30108_idx",
             old_fields=("recap_document", "user"),
         ),
         migrations.RenameIndex(
             model_name="usertag",
-            new_name="favorites_u_user_id_f6c9a6_idx",
+            new_name="favorites_usertag_user_id_name_54aef6fe_idx",
             old_fields=("user", "name"),
         ),
     ]
diff --git a/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.sql b/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.sql
index 9a8337a7e4..b614229998 100644
--- a/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.sql
+++ b/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.sql
@@ -1,18 +1,18 @@
 BEGIN;
 --
--- Rename unnamed index for ('recap_document', 'status') on prayer to favorites_p_recap_d_00e8c5_idx
+-- Rename unnamed index for ('recap_document', 'status') on prayer to favorites_prayer_recap_document_id_status_82e2dbbb_idx
 --
-ALTER INDEX "favorites_prayer_recap_document_id_status_82e2dbbb_idx" RENAME TO "favorites_p_recap_d_00e8c5_idx";
+ALTER INDEX "favorites_p_recap_d_00e8c5_idx" RENAME TO "favorites_prayer_recap_document_id_status_82e2dbbb_idx";
 --
--- Rename unnamed index for ('date_created', 'user', 'status') on prayer to favorites_p_date_cr_8bf054_idx
+-- Rename unnamed index for ('date_created', 'user', 'status') on prayer to favorites_prayer_date_created_user_id_status_880d7280_idx
 --
-ALTER INDEX "favorites_prayer_date_created_user_id_status_880d7280_idx" RENAME TO "favorites_p_date_cr_8bf054_idx";
+ALTER INDEX "favorites_p_date_cr_8bf054_idx" RENAME TO "favorites_prayer_date_created_user_id_status_880d7280_idx";
 --
--- Rename unnamed index for ('recap_document', 'user') on prayer to favorites_p_recap_d_7c046c_idx
+-- Rename unnamed index for ('recap_document', 'user') on prayer to favorites_prayer_recap_document_id_user_id_c5d30108_idx
 --
-ALTER INDEX "favorites_prayer_recap_document_id_user_id_c5d30108_idx" RENAME TO "favorites_p_recap_d_7c046c_idx";
+ALTER INDEX "favorites_p_recap_d_7c046c_idx" RENAME TO "favorites_prayer_recap_document_id_user_id_c5d30108_idx";
 --
--- Rename unnamed index for ('user', 'name') on usertag to favorites_u_user_id_f6c9a6_idx
+-- Rename unnamed index for ('user', 'name') on usertag to favorites_usertag_user_id_name_54aef6fe_idx
 --
-ALTER INDEX "favorites_usertag_user_id_name_54aef6fe_idx" RENAME TO "favorites_u_user_id_f6c9a6_idx";
+ALTER INDEX "favorites_u_user_id_f6c9a6_idx" RENAME TO "favorites_usertag_user_id_name_54aef6fe_idx";
 COMMIT;
diff --git a/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.py b/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.py
index 0ae3684cf0..e80e9540cc 100644
--- a/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.py
+++ b/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.py
@@ -12,7 +12,7 @@ class Migration(migrations.Migration):
     operations = [
         migrations.RenameIndex(
             model_name="docket",
-            new_name="lasc_docket_docket__4b4f04_idx",
+            new_name="lasc_docket_docket_number_district_division_code_07584433_idx",
             old_fields=("docket_number", "district", "division_code"),
         ),
     ]
diff --git a/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.sql b/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.sql
index ad6059d63c..4a572adfd9 100644
--- a/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.sql
+++ b/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.sql
@@ -1,6 +1,6 @@
 BEGIN;
 --
--- Rename unnamed index for ('docket_number', 'district', 'division_code') on docket to lasc_docket_docket__4b4f04_idx
+-- Rename unnamed index for ('docket_number', 'district', 'division_code') on docket to lasc_docket_docket_number_district_division_code_07584433_idx
 --
-ALTER INDEX "lasc_docket_docket_number_district_division_code_07584433_idx" RENAME TO "lasc_docket_docket__4b4f04_idx";
+ALTER INDEX "lasc_docket_docket__4b4f04_idx" RENAME TO "lasc_docket_docket_number_district_division_code_07584433_idx";
 COMMIT;
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 59c23238d0..0c292920a2 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -184,7 +184,6 @@ async def court_homepage(request: HttpRequest, pk: str) -> HttpResponse:
     return TemplateResponse(request, template, render_dict)
 
 
-@sync_to_async
 @group_required(
     "tenn_work_uploaders",
     "uploaders_tennworkcompcl",
@@ -197,7 +196,6 @@ async def court_homepage(request: HttpRequest, pk: str) -> HttpResponse:
     "uploaders_miss",
     "uploaders_missctapp",
 )
-@async_to_sync
 async def court_publish_page(request: HttpRequest, pk: str) -> HttpResponse:
     """Display upload form and intake Opinions for partner courts
 
diff --git a/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.py b/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.py
index c03497714b..a0b1f04a0e 100644
--- a/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.py
+++ b/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.py
@@ -12,7 +12,7 @@ class Migration(migrations.Migration):
     operations = [
         migrations.RenameIndex(
             model_name="fjcintegrateddatabase",
-            new_name="recap_fjcin_distric_731c7b_idx",
+            new_name="recap_fjcintegrateddatabase_district_id_455568623a9da568_idx",
             old_fields=("district", "docket_number"),
         ),
     ]
diff --git a/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.sql b/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.sql
index c26ea5f1e1..c92dbc53d3 100644
--- a/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.sql
+++ b/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.sql
@@ -1,6 +1,6 @@
 BEGIN;
 --
--- Rename unnamed index for ('district', 'docket_number') on fjcintegrateddatabase to recap_fjcin_distric_731c7b_idx
+-- Rename unnamed index for ('district', 'docket_number') on fjcintegrateddatabase to recap_fjcintegrateddatabase_district_id_455568623a9da568_idx
 --
-ALTER INDEX "recap_fjcintegrateddatabase_district_id_455568623a9da568_idx" RENAME TO "recap_fjcin_distric_731c7b_idx";
+ALTER INDEX "recap_fjcin_distric_731c7b_idx" RENAME TO "recap_fjcintegrateddatabase_district_id_455568623a9da568_idx";
 COMMIT;
diff --git a/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.py b/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.py
index d083358250..04f5ed3513 100644
--- a/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.py
+++ b/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.py
@@ -57,22 +57,22 @@ class Migration(migrations.Migration):
         ),
         migrations.RenameIndex(
             model_name="citation",
-            new_name="search_cita_volume_464334_idx",
+            new_name="search_citation_volume_251bc1d270a8abee_idx",
             old_fields=("volume", "reporter"),
         ),
         migrations.RenameIndex(
             model_name="citation",
-            new_name="search_cita_volume_92c344_idx",
+            new_name="search_citation_volume_ae340b5b02e8912_idx",
             old_fields=("volume", "reporter", "page"),
         ),
         migrations.RenameIndex(
             model_name="docketentry",
-            new_name="search_dock_recap_s_306ab9_idx",
+            new_name="search_docketentry_recap_sequence_number_1c82e51988e2d89f_idx",
             old_fields=("recap_sequence_number", "entry_number"),
         ),
         migrations.RenameIndex(
             model_name="recapdocument",
-            new_name="search_reca_documen_cc5acd_idx",
+            new_name="search_recapdocument_document_type_303cccac79571217_idx",
             old_fields=(
                 "document_type",
                 "document_number",
diff --git a/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.sql b/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.sql
index 1099c5fdbc..a4a90c0307 100644
--- a/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.sql
+++ b/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.sql
@@ -44,21 +44,21 @@ DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_8a108 ON "sear
 --
 DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_c9dd9 ON "search_tag";
 --
--- Rename unnamed index for ('volume', 'reporter') on citation to search_cita_volume_464334_idx
+-- Rename unnamed index for ('volume', 'reporter') on citation to search_citation_volume_251bc1d270a8abee_idx
 --
-ALTER INDEX "search_citation_volume_251bc1d270a8abee_idx" RENAME TO "search_cita_volume_464334_idx";
+ALTER INDEX "search_cita_volume_464334_idx" RENAME TO "search_citation_volume_251bc1d270a8abee_idx";
 --
--- Rename unnamed index for ('volume', 'reporter', 'page') on citation to search_cita_volume_92c344_idx
+-- Rename unnamed index for ('volume', 'reporter', 'page') on citation to search_citation_volume_ae340b5b02e8912_idx
 --
-ALTER INDEX "search_citation_volume_ae340b5b02e8912_idx" RENAME TO "search_cita_volume_92c344_idx";
+ALTER INDEX "search_cita_volume_92c344_idx" RENAME TO "search_citation_volume_ae340b5b02e8912_idx";
 --
--- Rename unnamed index for ('recap_sequence_number', 'entry_number') on docketentry to search_dock_recap_s_306ab9_idx
+-- Rename unnamed index for ('recap_sequence_number', 'entry_number') on docketentry to search_docketentry_recap_sequence_number_1c82e51988e2d89f_idx
 --
-ALTER INDEX "search_docketentry_recap_sequence_number_1c82e51988e2d89f_idx" RENAME TO "search_dock_recap_s_306ab9_idx";
+ALTER INDEX "search_dock_recap_s_306ab9_idx" RENAME TO "search_docketentry_recap_sequence_number_1c82e51988e2d89f_idx";
 --
--- Rename unnamed index for ('document_type', 'document_number', 'attachment_number') on recapdocument to search_reca_documen_cc5acd_idx
+-- Rename unnamed index for ('document_type', 'document_number', 'attachment_number') on recapdocument to search_recapdocument_document_type_303cccac79571217_idx
 --
-ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO "recapdocument to search_reca_documen_cc5acd_idx";
+ALTER INDEX "search_reca_documen_cc5acd_idx" RENAME TO "recapdocument to search_recapdocument_document_type_303cccac79571217_idx";
 --
 -- Create trigger update_or_delete_snapshot_update on model bankruptcyinformation
 --
@@ -88,7 +88,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_17e86()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -104,13 +104,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_17e86 ON "search_bankruptcyinformation";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_17e86
                 AFTER UPDATE ON "search_bankruptcyinformation"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."docket_id" IS DISTINCT FROM (NEW."docket_id") OR OLD."date_converted" IS DISTINCT FROM (NEW."date_converted") OR OLD."date_last_to_file_claims" IS DISTINCT FROM (NEW."date_last_to_file_claims") OR OLD."date_last_to_file_govt" IS DISTINCT FROM (NEW."date_last_to_file_govt") OR OLD."date_debtor_dismissed" IS DISTINCT FROM (NEW."date_debtor_dismissed") OR OLD."chapter" IS DISTINCT FROM (NEW."chapter") OR OLD."trustee_str" IS DISTINCT FROM (NEW."trustee_str"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_17e86();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_17e86 ON "search_bankruptcyinformation" IS '85d1a7878d466326c90c68b401f107b1158c2796';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model claim
 --
@@ -140,7 +140,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_bb32f()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -156,13 +156,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_bb32f ON "search_claim";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_bb32f
                 AFTER UPDATE ON "search_claim"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."docket_id" IS DISTINCT FROM (NEW."docket_id") OR OLD."date_claim_modified" IS DISTINCT FROM (NEW."date_claim_modified") OR OLD."date_original_entered" IS DISTINCT FROM (NEW."date_original_entered") OR OLD."date_original_filed" IS DISTINCT FROM (NEW."date_original_filed") OR OLD."date_last_amendment_entered" IS DISTINCT FROM (NEW."date_last_amendment_entered") OR OLD."date_last_amendment_filed" IS DISTINCT FROM (NEW."date_last_amendment_filed") OR OLD."claim_number" IS DISTINCT FROM (NEW."claim_number") OR OLD."creditor_details" IS DISTINCT FROM (NEW."creditor_details") OR OLD."creditor_id" IS DISTINCT FROM (NEW."creditor_id") OR OLD."status" IS DISTINCT FROM (NEW."status") OR OLD."entered_by" IS DISTINCT FROM (NEW."entered_by") OR OLD."filed_by" IS DISTINCT FROM (NEW."filed_by") OR OLD."amount_claimed" IS DISTINCT FROM (NEW."amount_claimed") OR OLD."unsecured_claimed" IS DISTINCT FROM (NEW."unsecured_claimed") OR OLD."secured_claimed" IS DISTINCT FROM (NEW."secured_claimed") OR OLD."priority_claimed" IS DISTINCT FROM (NEW."priority_claimed") OR OLD."description" IS DISTINCT FROM (NEW."description") OR OLD."remarks" IS DISTINCT FROM (NEW."remarks"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_bb32f();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_bb32f ON "search_claim" IS '5a3fde0d49f7f04afe30f9151a8b3535710ec1a0';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model claimhistory
 --
@@ -192,7 +192,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_137a5()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -208,13 +208,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_137a5 ON "search_claimhistory";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_137a5
                 AFTER UPDATE ON "search_claimhistory"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."file_size" IS DISTINCT FROM (NEW."file_size") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."thumbnail" IS DISTINCT FROM (NEW."thumbnail") OR OLD."thumbnail_status" IS DISTINCT FROM (NEW."thumbnail_status") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."ocr_status" IS DISTINCT FROM (NEW."ocr_status") OR OLD."date_upload" IS DISTINCT FROM (NEW."date_upload") OR OLD."document_number" IS DISTINCT FROM (NEW."document_number") OR OLD."attachment_number" IS DISTINCT FROM (NEW."attachment_number") OR OLD."pacer_doc_id" IS DISTINCT FROM (NEW."pacer_doc_id") OR OLD."is_available" IS DISTINCT FROM (NEW."is_available") OR OLD."is_free_on_pacer" IS DISTINCT FROM (NEW."is_free_on_pacer") OR OLD."is_sealed" IS DISTINCT FROM (NEW."is_sealed") OR OLD."claim_id" IS DISTINCT FROM (NEW."claim_id") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."claim_document_type" IS DISTINCT FROM (NEW."claim_document_type") OR OLD."description" IS DISTINCT FROM (NEW."description") OR OLD."claim_doc_id" IS DISTINCT FROM (NEW."claim_doc_id") OR OLD."pacer_dm_id" IS DISTINCT FROM (NEW."pacer_dm_id") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_137a5();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_137a5 ON "search_claimhistory" IS 'c4f2a33aa09534f0db6c38a62b0c4e2d656d1db0';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model court
 --
@@ -244,7 +244,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_c94ab()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -260,13 +260,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_c94ab ON "search_court";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_c94ab
                 AFTER UPDATE ON "search_court"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."pacer_court_id" IS DISTINCT FROM (NEW."pacer_court_id") OR OLD."pacer_has_rss_feed" IS DISTINCT FROM (NEW."pacer_has_rss_feed") OR OLD."pacer_rss_entry_types" IS DISTINCT FROM (NEW."pacer_rss_entry_types") OR OLD."date_last_pacer_contact" IS DISTINCT FROM (NEW."date_last_pacer_contact") OR OLD."fjc_court_id" IS DISTINCT FROM (NEW."fjc_court_id") OR OLD."in_use" IS DISTINCT FROM (NEW."in_use") OR OLD."has_opinion_scraper" IS DISTINCT FROM (NEW."has_opinion_scraper") OR OLD."has_oral_argument_scraper" IS DISTINCT FROM (NEW."has_oral_argument_scraper") OR OLD."position" IS DISTINCT FROM (NEW."position") OR OLD."citation_string" IS DISTINCT FROM (NEW."citation_string") OR OLD."short_name" IS DISTINCT FROM (NEW."short_name") OR OLD."full_name" IS DISTINCT FROM (NEW."full_name") OR OLD."url" IS DISTINCT FROM (NEW."url") OR OLD."start_date" IS DISTINCT FROM (NEW."start_date") OR OLD."end_date" IS DISTINCT FROM (NEW."end_date") OR OLD."jurisdiction" IS DISTINCT FROM (NEW."jurisdiction") OR OLD."notes" IS DISTINCT FROM (NEW."notes"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_c94ab();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_c94ab ON "search_court" IS '3d7ee4371f809a112d0ca08ebac797bfe18e404d';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model docket
 --
@@ -296,7 +296,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_7e039()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -312,13 +312,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039
                 AFTER UPDATE ON "search_docket"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."court_id" IS DISTINCT FROM (NEW."court_id") OR OLD."appeal_from_id" IS DISTINCT FROM (NEW."appeal_from_id") OR OLD."appeal_from_str" IS DISTINCT FROM (NEW."appeal_from_str") OR OLD."originating_court_information_id" IS DISTINCT FROM (NEW."originating_court_information_id") OR OLD."idb_data_id" IS DISTINCT FROM (NEW."idb_data_id") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."referred_to_id" IS DISTINCT FROM (NEW."referred_to_id") OR OLD."referred_to_str" IS DISTINCT FROM (NEW."referred_to_str") OR OLD."panel_str" IS DISTINCT FROM (NEW."panel_str") OR OLD."date_last_index" IS DISTINCT FROM (NEW."date_last_index") OR OLD."date_cert_granted" IS DISTINCT FROM (NEW."date_cert_granted") OR OLD."date_cert_denied" IS DISTINCT FROM (NEW."date_cert_denied") OR OLD."date_argued" IS DISTINCT FROM (NEW."date_argued") OR OLD."date_reargued" IS DISTINCT FROM (NEW."date_reargued") OR OLD."date_reargument_denied" IS DISTINCT FROM (NEW."date_reargument_denied") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_terminated" IS DISTINCT FROM (NEW."date_terminated") OR OLD."date_last_filing" IS DISTINCT FROM (NEW."date_last_filing") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."docket_number_core" IS DISTINCT FROM (NEW."docket_number_core") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id") OR OLD."cause" IS DISTINCT FROM (NEW."cause") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."jury_demand" IS DISTINCT FROM (NEW."jury_demand") OR OLD."jurisdiction_type" IS DISTINCT FROM (NEW."jurisdiction_type") OR OLD."appellate_fee_status" IS DISTINCT FROM (NEW."appellate_fee_status") OR OLD."appellate_case_type_information" IS DISTINCT FROM (NEW."appellate_case_type_information") OR OLD."mdl_status" IS DISTINCT FROM (NEW."mdl_status") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."filepath_ia_json" IS DISTINCT FROM (NEW."filepath_ia_json") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."ia_needs_upload" IS DISTINCT FROM (NEW."ia_needs_upload") OR OLD."ia_date_first_change" IS DISTINCT FROM (NEW."ia_date_first_change") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_7e039();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_7e039 ON "search_docket" IS 'cab7d35a7309b21c85f837b8a6c4759febe46fd8';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model docketentry
 --
@@ -348,7 +348,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_46e1e()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -364,13 +364,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_46e1e ON "search_docketentry";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_46e1e
                 AFTER UPDATE ON "search_docketentry"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."docket_id" IS DISTINCT FROM (NEW."docket_id") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."time_filed" IS DISTINCT FROM (NEW."time_filed") OR OLD."entry_number" IS DISTINCT FROM (NEW."entry_number") OR OLD."recap_sequence_number" IS DISTINCT FROM (NEW."recap_sequence_number") OR OLD."pacer_sequence_number" IS DISTINCT FROM (NEW."pacer_sequence_number") OR OLD."description" IS DISTINCT FROM (NEW."description"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_46e1e();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_46e1e ON "search_docketentry" IS '2330fe784864bcc2d76ebe1d4a07e7819fa8de38';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model opinion
 --
@@ -400,7 +400,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_67ecd()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -416,13 +416,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd
                 AFTER UPDATE ON "search_opinion"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '4a3d82790ac0cbd840d6a7f6c136d4cc65419e5e';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model opinioncluster
 --
@@ -452,7 +452,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_6a181()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -468,13 +468,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_6a181 ON "search_opinioncluster";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_6a181
                 AFTER UPDATE ON "search_opinioncluster"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."docket_id" IS DISTINCT FROM (NEW."docket_id") OR OLD."judges" IS DISTINCT FROM (NEW."judges") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_filed_is_approximate" IS DISTINCT FROM (NEW."date_filed_is_approximate") OR OLD."slug" IS DISTINCT FROM (NEW."slug") OR OLD."case_name_short" IS DISTINCT FROM (NEW."case_name_short") OR OLD."case_name" IS DISTINCT FROM (NEW."case_name") OR OLD."case_name_full" IS DISTINCT FROM (NEW."case_name_full") OR OLD."scdb_id" IS DISTINCT FROM (NEW."scdb_id") OR OLD."scdb_decision_direction" IS DISTINCT FROM (NEW."scdb_decision_direction") OR OLD."scdb_votes_majority" IS DISTINCT FROM (NEW."scdb_votes_majority") OR OLD."scdb_votes_minority" IS DISTINCT FROM (NEW."scdb_votes_minority") OR OLD."source" IS DISTINCT FROM (NEW."source") OR OLD."procedural_history" IS DISTINCT FROM (NEW."procedural_history") OR OLD."attorneys" IS DISTINCT FROM (NEW."attorneys") OR OLD."nature_of_suit" IS DISTINCT FROM (NEW."nature_of_suit") OR OLD."posture" IS DISTINCT FROM (NEW."posture") OR OLD."syllabus" IS DISTINCT FROM (NEW."syllabus") OR OLD."headnotes" IS DISTINCT FROM (NEW."headnotes") OR OLD."summary" IS DISTINCT FROM (NEW."summary") OR OLD."disposition" IS DISTINCT FROM (NEW."disposition") OR OLD."history" IS DISTINCT FROM (NEW."history") OR OLD."other_dates" IS DISTINCT FROM (NEW."other_dates") OR OLD."cross_reference" IS DISTINCT FROM (NEW."cross_reference") OR OLD."correction" IS DISTINCT FROM (NEW."correction") OR OLD."citation_count" IS DISTINCT FROM (NEW."citation_count") OR OLD."precedential_status" IS DISTINCT FROM (NEW."precedential_status") OR OLD."date_blocked" IS DISTINCT FROM (NEW."date_blocked") OR OLD."blocked" IS DISTINCT FROM (NEW."blocked") OR OLD."filepath_json_harvard" IS DISTINCT FROM (NEW."filepath_json_harvard"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_6a181();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_6a181 ON "search_opinioncluster" IS '907cc0f72768dba7763ab81e6e1c65f362301716';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model originatingcourtinformation
 --
@@ -504,7 +504,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_49538()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -520,13 +520,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_49538 ON "search_originatingcourtinformation";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_49538
                 AFTER UPDATE ON "search_originatingcourtinformation"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."docket_number" IS DISTINCT FROM (NEW."docket_number") OR OLD."assigned_to_id" IS DISTINCT FROM (NEW."assigned_to_id") OR OLD."assigned_to_str" IS DISTINCT FROM (NEW."assigned_to_str") OR OLD."ordering_judge_id" IS DISTINCT FROM (NEW."ordering_judge_id") OR OLD."ordering_judge_str" IS DISTINCT FROM (NEW."ordering_judge_str") OR OLD."court_reporter" IS DISTINCT FROM (NEW."court_reporter") OR OLD."date_disposed" IS DISTINCT FROM (NEW."date_disposed") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."date_judgment" IS DISTINCT FROM (NEW."date_judgment") OR OLD."date_judgment_eod" IS DISTINCT FROM (NEW."date_judgment_eod") OR OLD."date_filed_noa" IS DISTINCT FROM (NEW."date_filed_noa") OR OLD."date_received_coa" IS DISTINCT FROM (NEW."date_received_coa"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_49538();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_49538 ON "search_originatingcourtinformation" IS '5d249a18e8be51afa8c54132770efcdde2b47a61';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model recapdocument
 --
@@ -556,7 +556,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_8a108()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -572,13 +572,13 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_8a108 ON "search_recapdocument";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_8a108
                 AFTER UPDATE ON "search_recapdocument"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."file_size" IS DISTINCT FROM (NEW."file_size") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."thumbnail" IS DISTINCT FROM (NEW."thumbnail") OR OLD."thumbnail_status" IS DISTINCT FROM (NEW."thumbnail_status") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."ocr_status" IS DISTINCT FROM (NEW."ocr_status") OR OLD."date_upload" IS DISTINCT FROM (NEW."date_upload") OR OLD."document_number" IS DISTINCT FROM (NEW."document_number") OR OLD."attachment_number" IS DISTINCT FROM (NEW."attachment_number") OR OLD."pacer_doc_id" IS DISTINCT FROM (NEW."pacer_doc_id") OR OLD."is_available" IS DISTINCT FROM (NEW."is_available") OR OLD."is_free_on_pacer" IS DISTINCT FROM (NEW."is_free_on_pacer") OR OLD."is_sealed" IS DISTINCT FROM (NEW."is_sealed") OR OLD."docket_entry_id" IS DISTINCT FROM (NEW."docket_entry_id") OR OLD."document_type" IS DISTINCT FROM (NEW."document_type") OR OLD."description" IS DISTINCT FROM (NEW."description"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_8a108();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_8a108 ON "search_recapdocument" IS 'a3e0c759d8c03f380dd3eddfcff551091fcee1d1';
-        
+
 --
 -- Create trigger update_or_delete_snapshot_update on model tag
 --
@@ -608,7 +608,7 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_c9dd9()
             RETURNS TRIGGER AS $$
-                
+
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -624,11 +624,11 @@ ALTER INDEX "search_recapdocument_document_type_303cccac79571217_idx" RENAME TO
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_c9dd9 ON "search_tag";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_c9dd9
                 AFTER UPDATE ON "search_tag"
-                
-                
+
+
                 FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."name" IS DISTINCT FROM (NEW."name"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_c9dd9();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_c9dd9 ON "search_tag" IS '4071657dcfe71811e9e7a5c24dd77c22f81d7377';
-        
+
 COMMIT;

From e7638e044b4da818e405870e9b445ab0f3f03d10 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 9 Sep 2024 08:45:08 -0400
Subject: [PATCH 337/372] feat(assets): Adds CSS and JavaScript files for
 progress bar

Includes necessary styles and logic to be used in creating the progress bar component.
---
 cl/assets/static-global/css/override.css   | 14 ++++++++
 cl/assets/static-global/js/progress-bar.js | 38 ++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 cl/assets/static-global/js/progress-bar.js

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index d19162860e..7a27e9f08f 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -1709,3 +1709,17 @@ rect.series-segment {
 .htmx-request.htmx-hidden-indicator {
     display:inline;
 }
+
+.turbo-progress-bar {
+  position: fixed;
+  display: block;
+  top: 0;
+  left: 0;
+  height: 3px;
+  background: #B53C2C;
+  z-index: 2147483647;
+  transition:
+    width 300ms ease-out,
+    opacity 150ms 150ms ease-in;
+  transform: translate3d(0, 0, 0);
+}
diff --git a/cl/assets/static-global/js/progress-bar.js b/cl/assets/static-global/js/progress-bar.js
new file mode 100644
index 0000000000..e36abb9fef
--- /dev/null
+++ b/cl/assets/static-global/js/progress-bar.js
@@ -0,0 +1,38 @@
+let trickleInterval = null;
+
+function updateProgressBar() {
+  let progressElement = document.getElementById('progress-bar');
+  let oldValue = 0;
+  if ('value' in progressElement.dataset) {
+    oldValue = parseFloat(progressElement.dataset.value);
+  }
+  let newValue = oldValue + Math.random() / 10;
+  progressElement.style.width = `${10 + newValue * 85}%`;
+  progressElement.dataset.value = newValue;
+}
+
+function installProgressBar() {
+  let progressElement = document.createElement('div');
+  progressElement.id = 'progress-bar';
+  progressElement.classList.add('turbo-progress-bar');
+  progressElement.style.width = '0';
+  progressElement.style.opacity = '1';
+  document.body.prepend(progressElement);
+  trickleInterval = window.setInterval(updateProgressBar, 300);
+}
+
+document.onvisibilitychange = () => {
+  if (document.visibilityState !== 'hidden') return;
+
+  let progressElement = document.getElementById('progress-bar');
+  if (progressElement == null) return;
+
+  window.clearInterval(trickleInterval);
+  progressElement.style.width = '100%';
+  progressElement.style.opacity = 0;
+
+  setTimeout(() => {
+    let progressElement = document.getElementById('progress-bar');
+    progressElement.remove();
+  }, 450);
+};

From 243332eb4ae9f90c4bf76a03ba0c7b74bb47398e Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Tue, 10 Sep 2024 09:04:37 -0400
Subject: [PATCH 338/372] feat(templates): Adds the progress bar to the search
 interface

---
 cl/assets/static-global/js/base.js | 1 +
 cl/search/templates/advanced.html  | 1 +
 cl/search/templates/homepage.html  | 1 +
 cl/search/templates/search.html    | 1 +
 4 files changed, 4 insertions(+)

diff --git a/cl/assets/static-global/js/base.js b/cl/assets/static-global/js/base.js
index ea71c270b5..3380d59c2b 100644
--- a/cl/assets/static-global/js/base.js
+++ b/cl/assets/static-global/js/base.js
@@ -103,6 +103,7 @@ $(document).ready(function () {
         .val(el.val())
         .appendTo('#search-form');
     });
+    installProgressBar();
     document.location = '/?' + $('#search-form').serialize();
   }
 
diff --git a/cl/search/templates/advanced.html b/cl/search/templates/advanced.html
index 7dd9d46588..bdbd5d1cc7 100644
--- a/cl/search/templates/advanced.html
+++ b/cl/search/templates/advanced.html
@@ -37,6 +37,7 @@
 {% endblock %}
 
 {% block footer-scripts %}
+  <script src="{% static "js/progress-bar.js" %}"></script>
   {% include "includes/date_picker.html" %}
   <script type="text/javascript" nonce="{{ request.csp_nonce }}">
     $(document).ready(function () {
diff --git a/cl/search/templates/homepage.html b/cl/search/templates/homepage.html
index 0abd862d1c..11d2e56053 100644
--- a/cl/search/templates/homepage.html
+++ b/cl/search/templates/homepage.html
@@ -13,6 +13,7 @@
 {% endblock %}
 
 {% block footer-scripts %}
+    <script src="{% static "js/progress-bar.js" %}"></script>
     <script type="text/javascript" nonce="{{ request.csp_nonce }}">
         var opinions = {
             {% for viz in visualizations %}
diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index 8fd9de9ea5..2f1f07cd66 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -50,6 +50,7 @@
 {% endblock %}
 
 {% block footer-scripts %}
+  <script src="{% static "js/progress-bar.js" %}"></script>
   {% include "includes/date_picker.html" %}
   {% if alert_form.errors or request.GET.show_alert_modal %}
     <script type="text/javascript" nonce="{{ request.csp_nonce }}">

From c840bb745b1f78ac022d8d3f5ce9aff8675e4744 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Tue, 10 Sep 2024 09:33:35 -0400
Subject: [PATCH 339/372] feat(assets): Adds logic to disable submit buttons
 during requests

This commit adds a utility function to automatically disable submit buttons within forms to prevent multiple submissions while requests are in progress.
---
 cl/assets/static-global/js/base.js         | 1 +
 cl/assets/static-global/js/progress-bar.js | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/cl/assets/static-global/js/base.js b/cl/assets/static-global/js/base.js
index 3380d59c2b..99355aa207 100644
--- a/cl/assets/static-global/js/base.js
+++ b/cl/assets/static-global/js/base.js
@@ -104,6 +104,7 @@ $(document).ready(function () {
         .appendTo('#search-form');
     });
     installProgressBar();
+    disableAllSubmitButtons();
     document.location = '/?' + $('#search-form').serialize();
   }
 
diff --git a/cl/assets/static-global/js/progress-bar.js b/cl/assets/static-global/js/progress-bar.js
index e36abb9fef..a1c91bda45 100644
--- a/cl/assets/static-global/js/progress-bar.js
+++ b/cl/assets/static-global/js/progress-bar.js
@@ -21,6 +21,15 @@ function installProgressBar() {
   trickleInterval = window.setInterval(updateProgressBar, 300);
 }
 
+function disableAllSubmitButtons() {
+  // Get all submit buttons on the page
+  const submitButtons = document.querySelectorAll('input[type="submit"],button[type="submit"]');
+  // Disable each element
+  submitButtons.forEach((button) => {
+    button.disabled = true;
+  });
+}
+
 document.onvisibilitychange = () => {
   if (document.visibilityState !== 'hidden') return;
 

From 155281bbdaebd3baceec7171818d183f4e1dd2a3 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 10 Sep 2024 10:14:42 -0500
Subject: [PATCH 340/372] fix(django): Apply additional tweaks to avoid changes
 on migrations

---
 cl/favorites/migrations/0001_initial.py       |  8 +++----
 ...avorites_p_recap_d_00e8c5_idx_and_more.sql |  8 +++----
 cl/favorites/models.py                        | 22 +++++++++++++++----
 cl/lasc/migrations/0001_initial.py            |  2 +-
 ...on_code_lasc_docket_docket__4b4f04_idx.sql |  2 +-
 cl/lasc/models.py                             |  5 ++++-
 cl/recap/migrations/0002_initial_part_two.py  |  2 +-
 ..._number_recap_fjcin_distric_731c7b_idx.sql |  2 +-
 cl/recap/models.py                            |  7 +++++-
 cl/search/migrations/0001_initial.py          |  8 +++----
 ...ate_or_delete_snapshot_update_and_more.sql |  8 +++----
 cl/search/models.py                           | 18 +++++++++++----
 12 files changed, 62 insertions(+), 30 deletions(-)

diff --git a/cl/favorites/migrations/0001_initial.py b/cl/favorites/migrations/0001_initial.py
index 452663241c..2ead29a919 100644
--- a/cl/favorites/migrations/0001_initial.py
+++ b/cl/favorites/migrations/0001_initial.py
@@ -41,7 +41,7 @@ class Migration(migrations.Migration):
             options={
                 'unique_together': {('user', 'name')},
                 'indexes': [
-                    models.Index(fields=['user', 'name'], name='favorites_u_user_id_f6c9a6_idx'),
+                    models.Index(fields=['user', 'name'], name='favorites_usertag_user_id_name_54aef6fe_idx'),
                 ],
             },
         ),
@@ -61,9 +61,9 @@ class Migration(migrations.Migration):
             ],
             options={
                 'indexes': [
-                    models.Index(fields=['recap_document', 'user'], name='favorites_p_recap_d_7c046c_idx'),
-                    models.Index(fields=['recap_document', 'status'], name='favorites_p_recap_d_00e8c5_idx'),
-                    models.Index(fields=['date_created', 'user', 'status'], name='favorites_p_date_cr_8bf054_idx'),
+                    models.Index(fields=['recap_document', 'user'], name='favorites_prayer_recap_document_id_user_id_c5d30108_idx'),
+                    models.Index(fields=['recap_document', 'status'], name='favorites_prayer_recap_document_id_status_82e2dbbb_idx'),
+                    models.Index(fields=['date_created', 'user', 'status'], name='favorites_prayer_date_created_user_id_status_880d7280_idx'),
                 ],
             },
         ),
diff --git a/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.sql b/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.sql
index b614229998..1ce51c6907 100644
--- a/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.sql
+++ b/cl/favorites/migrations/0007_rename_prayer_recap_document_status_favorites_p_recap_d_00e8c5_idx_and_more.sql
@@ -2,17 +2,17 @@ BEGIN;
 --
 -- Rename unnamed index for ('recap_document', 'status') on prayer to favorites_prayer_recap_document_id_status_82e2dbbb_idx
 --
-ALTER INDEX "favorites_p_recap_d_00e8c5_idx" RENAME TO "favorites_prayer_recap_document_id_status_82e2dbbb_idx";
+-- (no-op)
 --
 -- Rename unnamed index for ('date_created', 'user', 'status') on prayer to favorites_prayer_date_created_user_id_status_880d7280_idx
 --
-ALTER INDEX "favorites_p_date_cr_8bf054_idx" RENAME TO "favorites_prayer_date_created_user_id_status_880d7280_idx";
+-- (no-op)
 --
 -- Rename unnamed index for ('recap_document', 'user') on prayer to favorites_prayer_recap_document_id_user_id_c5d30108_idx
 --
-ALTER INDEX "favorites_p_recap_d_7c046c_idx" RENAME TO "favorites_prayer_recap_document_id_user_id_c5d30108_idx";
+-- (no-op)
 --
 -- Rename unnamed index for ('user', 'name') on usertag to favorites_usertag_user_id_name_54aef6fe_idx
 --
-ALTER INDEX "favorites_u_user_id_f6c9a6_idx" RENAME TO "favorites_usertag_user_id_name_54aef6fe_idx";
+-- (no-op)
 COMMIT;
diff --git a/cl/favorites/models.py b/cl/favorites/models.py
index af1d80af59..7ec08d7e6a 100644
--- a/cl/favorites/models.py
+++ b/cl/favorites/models.py
@@ -127,7 +127,12 @@ def __str__(self) -> str:
 
     class Meta:
         unique_together = (("user", "name"),)
-        indexes = [models.Index(fields=["user", "name"])]
+        indexes = [
+            models.Index(
+                fields=["user", "name"],
+                name="favorites_usertag_user_id_name_54aef6fe_idx",
+            )
+        ]
 
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
@@ -167,11 +172,20 @@ class Meta:
             # prayers do we have for this document?
             # When loading the prayer leader board, we'll ask: Which documents
             # have the most outstanding prayers?
-            models.Index(fields=["recap_document", "status"]),
+            models.Index(
+                fields=["recap_document", "status"],
+                name="favorites_prayer_recap_document_id_status_82e2dbbb_idx",
+            ),
             # When loading docket pages, we'll ask (hundreds of times): Did
             # user ABC pray for document XYZ?
-            models.Index(fields=["recap_document", "user"]),
+            models.Index(
+                fields=["recap_document", "user"],
+                name="favorites_prayer_recap_document_id_user_id_c5d30108_idx",
+            ),
             # When a user votes, we'll ask: How many outstanding prayers did
             # user ABC make today?
-            models.Index(fields=["date_created", "user", "status"]),
+            models.Index(
+                fields=["date_created", "user", "status"],
+                name="favorites_prayer_date_created_user_id_status_880d7280_idx",
+            ),
         ]
diff --git a/cl/lasc/migrations/0001_initial.py b/cl/lasc/migrations/0001_initial.py
index 1c5ee54ee9..fae8246c62 100644
--- a/cl/lasc/migrations/0001_initial.py
+++ b/cl/lasc/migrations/0001_initial.py
@@ -42,7 +42,7 @@ class Migration(migrations.Migration):
             ],
             options={
                 'indexes': [
-                    models.Index(fields=['docket_number', 'district', 'division_code'], name='lasc_docket_docket__4b4f04_idx'),
+                    models.Index(fields=['docket_number', 'district', 'division_code'], name='lasc_docket_docket_number_district_division_code_07584433_idx'),
                 ],
             },
         ),
diff --git a/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.sql b/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.sql
index 4a572adfd9..afff17d231 100644
--- a/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.sql
+++ b/cl/lasc/migrations/0002_rename_docket_docket_number_district_division_code_lasc_docket_docket__4b4f04_idx.sql
@@ -2,5 +2,5 @@ BEGIN;
 --
 -- Rename unnamed index for ('docket_number', 'district', 'division_code') on docket to lasc_docket_docket_number_district_division_code_07584433_idx
 --
-ALTER INDEX "lasc_docket_docket__4b4f04_idx" RENAME TO "lasc_docket_docket_number_district_division_code_07584433_idx";
+-- (no-op)
 COMMIT;
diff --git a/cl/lasc/models.py b/cl/lasc/models.py
index 2cff2d37b1..7efb4c59cb 100644
--- a/cl/lasc/models.py
+++ b/cl/lasc/models.py
@@ -238,7 +238,10 @@ class Docket(AbstractDateTimeModel):
 
     class Meta:
         indexes = [
-            models.Index(fields=["docket_number", "district", "division_code"])
+            models.Index(
+                fields=["docket_number", "district", "division_code"],
+                name="lasc_docket_docket_number_district_division_code_07584433_idx",
+            )
         ]
 
     @property
diff --git a/cl/recap/migrations/0002_initial_part_two.py b/cl/recap/migrations/0002_initial_part_two.py
index 27458b19d5..4f86187afc 100644
--- a/cl/recap/migrations/0002_initial_part_two.py
+++ b/cl/recap/migrations/0002_initial_part_two.py
@@ -94,6 +94,6 @@ class Migration(migrations.Migration):
         ),
         migrations.AddIndex(
             model_name='fjcintegrateddatabase',
-            index=models.Index(fields=['district', 'docket_number'], name='recap_fjcin_distric_731c7b_idx'),
+            index=models.Index(fields=['district', 'docket_number'], name='recap_fjcintegrateddatabase_district_id_455568623a9da568_idx'),
         ),
     ]
diff --git a/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.sql b/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.sql
index c92dbc53d3..7bc0b1abc1 100644
--- a/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.sql
+++ b/cl/recap/migrations/0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx.sql
@@ -2,5 +2,5 @@ BEGIN;
 --
 -- Rename unnamed index for ('district', 'docket_number') on fjcintegrateddatabase to recap_fjcintegrateddatabase_district_id_455568623a9da568_idx
 --
-ALTER INDEX "recap_fjcin_distric_731c7b_idx" RENAME TO "recap_fjcintegrateddatabase_district_id_455568623a9da568_idx";
+-- (no-op)
 COMMIT;
diff --git a/cl/recap/models.py b/cl/recap/models.py
index 02dfa2b938..988200d136 100644
--- a/cl/recap/models.py
+++ b/cl/recap/models.py
@@ -906,4 +906,9 @@ def __str__(self) -> str:
 
     class Meta:
         verbose_name_plural = "FJC Integrated Database Entries"
-        indexes = [models.Index(fields=["district", "docket_number"])]
+        indexes = [
+            models.Index(
+                fields=["district", "docket_number"],
+                name="recap_fjcintegrateddatabase_district_id_455568623a9da568_idx",
+            )
+        ]
diff --git a/cl/search/migrations/0001_initial.py b/cl/search/migrations/0001_initial.py
index f8defc0a2e..1e96896b35 100644
--- a/cl/search/migrations/0001_initial.py
+++ b/cl/search/migrations/0001_initial.py
@@ -421,7 +421,7 @@ class Migration(migrations.Migration):
         ),
         migrations.AddIndex(
             model_name='recapdocument',
-            index=models.Index(fields=['document_type', 'document_number', 'attachment_number'], name='search_reca_documen_cc5acd_idx'),
+            index=models.Index(fields=['document_type', 'document_number', 'attachment_number'], name='search_recapdocument_document_type_303cccac79571217_idx'),
         ),
         migrations.AlterUniqueTogether(
             name='opinionscited',
@@ -429,7 +429,7 @@ class Migration(migrations.Migration):
         ),
         migrations.AddIndex(
             model_name='docketentry',
-            index=models.Index(fields=['recap_sequence_number', 'entry_number'], name='search_dock_recap_s_306ab9_idx')
+            index=models.Index(fields=['recap_sequence_number', 'entry_number'], name='search_docketentry_recap_sequence_number_1c82e51988e2d89f_idx')
         ),
         migrations.AddIndex(
             model_name='docket',
@@ -445,11 +445,11 @@ class Migration(migrations.Migration):
         ),
         migrations.AddIndex(
             model_name='citation',
-            index=models.Index(fields=['volume', 'reporter', 'page'], name='search_cita_volume_92c344_idx')
+            index=models.Index(fields=['volume', 'reporter', 'page'], name='search_citation_volume_ae340b5b02e8912_idx')
         ),
         migrations.AddIndex(
             model_name='citation',
-            index=models.Index(fields=['volume', 'reporter'], name='search_cita_volume_464334_idx')
+            index=models.Index(fields=['volume', 'reporter'], name='search_citation_volume_251bc1d270a8abee_idx')
         ),
 
     ]
diff --git a/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.sql b/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.sql
index a4a90c0307..f9c13fb928 100644
--- a/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.sql
+++ b/cl/search/migrations/0017_remove_bankruptcyinformation_update_or_delete_snapshot_update_and_more.sql
@@ -46,19 +46,19 @@ DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_c9dd9 ON "sear
 --
 -- Rename unnamed index for ('volume', 'reporter') on citation to search_citation_volume_251bc1d270a8abee_idx
 --
-ALTER INDEX "search_cita_volume_464334_idx" RENAME TO "search_citation_volume_251bc1d270a8abee_idx";
+-- (no-op)
 --
 -- Rename unnamed index for ('volume', 'reporter', 'page') on citation to search_citation_volume_ae340b5b02e8912_idx
 --
-ALTER INDEX "search_cita_volume_92c344_idx" RENAME TO "search_citation_volume_ae340b5b02e8912_idx";
+-- (no-op)
 --
 -- Rename unnamed index for ('recap_sequence_number', 'entry_number') on docketentry to search_docketentry_recap_sequence_number_1c82e51988e2d89f_idx
 --
-ALTER INDEX "search_dock_recap_s_306ab9_idx" RENAME TO "search_docketentry_recap_sequence_number_1c82e51988e2d89f_idx";
+-- (no-op)
 --
 -- Rename unnamed index for ('document_type', 'document_number', 'attachment_number') on recapdocument to search_recapdocument_document_type_303cccac79571217_idx
 --
-ALTER INDEX "search_reca_documen_cc5acd_idx" RENAME TO "recapdocument to search_recapdocument_document_type_303cccac79571217_idx";
+-- (no-op)
 --
 -- Create trigger update_or_delete_snapshot_update on model bankruptcyinformation
 --
diff --git a/cl/search/models.py b/cl/search/models.py
index 2ba633a948..a0c808f3d3 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -1237,7 +1237,10 @@ class Meta:
                 name="entry_number_idx",
                 condition=Q(entry_number=1),
             ),
-            models.Index(fields=["recap_sequence_number", "entry_number"]),
+            models.Index(
+                fields=["recap_sequence_number", "entry_number"],
+                name="search_docketentry_recap_sequence_number_1c82e51988e2d89f_idx",
+            ),
         ]
         ordering = ("recap_sequence_number", "entry_number")
         permissions = (("has_recap_api_access", "Can work with RECAP API"),)
@@ -1412,7 +1415,8 @@ class Meta:
                     "document_type",
                     "document_number",
                     "attachment_number",
-                ]
+                ],
+                name="search_recapdocument_document_type_303cccac79571217_idx",
             ),
             models.Index(
                 fields=["filepath_local"],
@@ -3229,9 +3233,15 @@ def get_absolute_url(self) -> str:
     class Meta:
         indexes = [
             # To look up individual citations
-            models.Index(fields=["volume", "reporter", "page"]),
+            models.Index(
+                fields=["volume", "reporter", "page"],
+                name="search_citation_volume_ae340b5b02e8912_idx",
+            ),
             # To generate reporter volume lists
-            models.Index(fields=["volume", "reporter"]),
+            models.Index(
+                fields=["volume", "reporter"],
+                name="search_citation_volume_251bc1d270a8abee_idx",
+            ),
         ]
         unique_together = (("cluster", "volume", "reporter", "page"),)
 

From 824f35d58e7eac0f92c3dc02223adce8b4595fcf Mon Sep 17 00:00:00 2001
From: legaltextai <144342123+legaltextai@users.noreply.github.com>
Date: Tue, 10 Sep 2024 11:33:59 -0400
Subject: [PATCH 341/372] fix(privacy): Simplify privacy policy and update
 third-party list

This commit addresses issue #203 by updating our privacy policy. The main goals achieved are:

1. Simplification of the privacy policy language
2. Updated list of third parties we share data with
3. Clarification on the use of search queries for improving algorithms

These changes aim to provide users with clearer, more accurate information
about our data practices, addressing the concerns raised in issue #203.

Fixes: #203
---
 .../templates/includes/terms_history.html     |   1 +
 cl/simple_pages/templates/terms/19.html       | 188 ++++++++++++++++++
 cl/simple_pages/templates/terms/latest.html   |  70 ++-----
 3 files changed, 209 insertions(+), 50 deletions(-)
 create mode 100644 cl/simple_pages/templates/terms/19.html

diff --git a/cl/simple_pages/templates/includes/terms_history.html b/cl/simple_pages/templates/includes/terms_history.html
index e5e6479236..e61d7c1df4 100644
--- a/cl/simple_pages/templates/includes/terms_history.html
+++ b/cl/simple_pages/templates/includes/terms_history.html
@@ -3,6 +3,7 @@ <h3>Other Versions</h3>
 
   <p>Earlier versions of these policies can be found in the following locations:</p>
   <ul>
+    <li><a href="{% url 'old_terms' '19' %}" rel="nofollow">Between October 7, 2023 and September 10, 2024</a></li>
     <li><a href="{% url 'old_terms' '18' %}" rel="nofollow">Between February 8, 2023 and October 6, 2023</a></li>
     <li><a href="{% url 'old_terms' '17' %}" rel="nofollow">Between July 7, 2022 and February 8, 2023</a></li>
     <li><a href="{% url 'old_terms' '16' %}" rel="nofollow">Between June 15, 2022 and July 7, 2022</a></li>
diff --git a/cl/simple_pages/templates/terms/19.html b/cl/simple_pages/templates/terms/19.html
new file mode 100644
index 0000000000..d6b8f3f565
--- /dev/null
+++ b/cl/simple_pages/templates/terms/19.html
@@ -0,0 +1,188 @@
+{% extends "base.html" %}
+
+{% block title %}{{ title }}{% endblock %}
+{% block sidebar %}{% endblock %}
+
+{% block content %}
+  <div id="toc-container" class="hidden-xs hidden-sm col-md-3">
+    <div id="toc">
+      <h3>Table of Contents</h3>
+      <ul>
+        <li><a href="#terms">Terms of Service</a></li>
+        <li><a href="#privacy">Privacy Policy</a></li>
+        <li><a href="#removal">Removal Policy</a></li>
+        <li><a href="#copyright">Copyright Policy</a></li>
+        <li><a href="#opt-out">Tracking Opt Out</a></li>
+        <li><a href="#versions">Previous Versions</a></li>
+      </ul>
+    </div>
+  </div>
+
+  <div class="col-xs-12 col-md-8 col-lg-6">
+
+
+    <h1>Terms and Policies</h1>
+    <div id="terms">
+      <h3>Terms of Service</h3>
+      <p>By accessing, browsing, or using CourtListener.com, you agree the following terms:</p>
+
+      <h4>1. We are not your lawyers</h4>
+      <p>CourtListener.com is not intended to be or to provide legal advice. Any information supplied by CourtListener.com or its operators is intended solely as general guidance on the use of the service, and does not constitute professional or legal advice.
+      </p>
+
+      <h4>2. The service may be unreliable and might simply go away</h4>
+      <p>We've been here since 2010, but CourtListener.com and Free Law Project shall not be responsible for any delays or interruptions of, or errors or omissions contained in, the service. CourtListener.com may discontinue or alter any aspect of this service, including, but not limited to: (i) restricting the time of availability, (ii) restricting the availability and/or scope of the service, (iii) restricting the amount of use permitted, at CourtListener.com's sole discretion and without prior notice or liability.
+      </p>
+
+      <h4>3. The documents on this site may be unreliably reproduced</h4>
+      <p>CourtListener.com makes no representations, warranties or covenants regarding, and does not guarantee, the truthfulness, accuracy, relevancy, or reliability of any information or other material that are communicated through, or posted to, the service. You acknowledge that any reliance on information or other material communicated through, or posted to, the service will be at your own risk.
+      </p>
+
+      <h4>4. Usage Restrictions</h4>
+      <p>You will not use, intentionally or unintentionally, Courtlistener.com or any information derived therefrom in violation of any applicable international, national, federal, state, or local law. You understand that Free Law Project is not a consumer reporting agency under the Fair Credit Reporting Act (“FCRA”) and therefore you agree that you will not use Courtlistener.com and any information derived therefrom: (1) as a factor in establishing an individual’s eligibility for credit, insurance, employment, government benefits, housing, or any other FCRA purpose as specified in 15 U.S.C. § 1681b(a); (2) to generate a “consumer report” as that term is defined under FCRA, 15 U.S.C. § 1681a(d); and (3) in any manner that could result in Free Law becoming subject to FCRA. You are prohibited from using the Service and any information derived therefrom in any unauthorized manner and in furtherance of criminal or illegal activities.</p>
+
+      <h4><strong>5. Disclaimer of Warranty</strong></h4>
+      <p><strong>You agree that use of the service is entirely at your own risk. The service is provided "as is," without warranty of any kind whatsoever, either express or implied, to you or any other person relating in any way to the service, including any part thereof, or any Web site or other content or service that may be accessible directly or indirectly through the service. Without limiting the generality of the foregoing, CourtListener.com disclaims to the maximum extent permitted by law any and all (i) warranties of merchantability or fitness for a particular purpose, (ii) warranties against infringement of any third party intellectual property or proprietary rights, (iii) warranties relating to delays, interruptions, errors or omissions in the service, or any party thereof, (iv) warranties relating to the transmission or delivery of the service, and (v) warranties otherwise related to performance, nonperformance, or other acts or omissions by CourtListener.com or any third party.</strong>
+      </p>
+
+      <h4>6. Limitations of Liability</h4>
+      <p>This disclaimer of liability applies to any damages or injury caused by any failure or performance, error, omission, interruption, deletion, defect, delay in operation or transmission, computer virus, communication line failure, theft or destruction or unauthorized access to, alteration of, or use of record, whether for breach of contract, tortious behavior, negligence, or under any other cause of action. You specifically acknowledge that the risk of injury from the foregoing rests entirely with you. Neither CourtListener.com nor any of its partners, agents, executives, directors, employees or affiliates shall be liable for any direct, indirect, incidental, special or consequential damages whatsoever arising out of use of this service or inability to again access to or use this service or out of any breach of any warranty. You hereby acknowledge that the provisions of this section shall apply to all content on Courtlistener.com.
+      </p>
+
+      <h4>7. Governing Law and Jurisdiction</h4>
+      <p>These Terms are governed by and shall be construed in accordance with the laws of the State of California. Any action arising out of or relating to these terms shall be filed only in state or federal courts located in California, and you agree to submit to the personal jurisdiction of such courts for the purpose of litigating any such action.</p>
+
+      End of terms.
+      <p><em>Last modified: June 11, 2020</em></p>
+    </div>
+
+    <hr>
+    <div id="privacy">
+      <h3>Privacy Policy</h3>
+      <p>CourtListener does not sell information collected about your visits to this site or usage of this site and will only share such information in the ways explained in this policy. We have never used third party tracking. From 2009 to 2022 we self-hosted the Matomo analytics system so that our user's traffic would not be shared with third parties. In March of 2022, we went further, and stopped tracking users across sites or even pages by switching to <a href="https://plausible.io">Plausible Analytics</a>.
+      </p>
+
+      <p>We do not track you across pages or visits, but we do use <a href="https://plausible.io" target="_blank">Plausible Analytics</a> to collect some information from your computer, and we do log visits. These systems help us identify popular pages, diagnose technical problems, and defend our initiatives against attacks. For example, we collect the following technical information about our visitors:
+      </p>
+      <ul>
+        <li>The website that referred you to our site (via the <a href="https://en.wikipedia.org/wiki/HTTP_referrer">HTTP Referer</a> field)
+        </li>
+        <li>The browser software you use, your operating system, and your monitor's resolution
+        </li>
+        <li>What country you are from
+        </li>
+        <li>Any queries that you made in our search, alert or Atom feeds
+        </li>
+      </ul>
+
+      <p>Where possible, we delete usage-related logs automatically when they are 12 weeks old. We may share anonymized information collected with academic researchers, and they may publish their research based on that information.
+      </p>
+
+      <p>We do not collect personal information about our visitors unless they register an account. We will store account information unless you ask us to delete your account or delete it yourself.
+      </p>
+
+
+      <p>We currently share data with the following third parties:</p>
+      <ul>
+        <li>
+          <p>We use Amazon, Inc.'s AWS services to store and process data.</p>
+        </li>
+        <li>
+          <p>For the purpose of sending newsletters, we share information with <a href="https://moosend.com/" target="_blank" rel="nofollow">Moosend, Ltd, a subsidiary of Sitecore, Inc</a>.
+          </p>
+        </li>
+        <li>
+          <p>For the purpose of processing transactions, we share information with <a href="https://www.neonone.com/" target="_blank" rel="nofollow">Neon One, LLC</a>.
+          </p>
+        </li>
+        <li>
+          <p>For the purpose of error tracking, we share error and logging information with <a href="https://sentry.com" target="_blank">Sentry, Inc</a>. This information generally does not contain any PII and is purged after 90 days.</p>
+        </li>
+        <li>
+          <p>For the purpose of counting page visits, we use <a href="https://plausible.io" target="_blank">Plausible Analytics</a>.</p>
+        </li>
+        <li>
+          <p>For the purpose of handling donations and managing donors and outreach, we use <a href="https://neonone.com/" target="_blank">Neon One</a>.</p>
+        </li>
+      </ul>
+
+      <p>Please contact us if you have any complaints or concerns about our privacy policy, or notify the FTC via their <a href="https://www.ftccomplaintassistant.gov">online Complaint Assistant</a>.
+      </p>
+
+      <p><em>Last modified: October 6, 2023</em></p>
+    </div>
+
+    <hr>
+
+    <div id="removal">
+      <h3>Removal Policy</h3>
+      <p>The Board of Directors of <a href="https://free.law/">Free Law Project</a>, the non-profit sponsoring CourtListener, believes that there is a compelling public interest in making the law broadly available to all. In pursuit of this goal, we have poured our energies into making CourtListener.com the best free legal research tool that we can. Occasionally however, there can be competing privacy interests of individuals and organizations. This policy describes the balance that we attempt to strike when thinking about this conflict of interests, and describes our policy for accepting and working with removal requests.
+      </p>
+
+      <p>If you would like a case removed from the results of the major search engines, please <a href="{% url "contact" %}">send us a request using the contact page</a>. You must include links to the pages you would like removed. Upon receiving this request, we will generally block search engines from indexing pages on our site by using the robots HTML meta tag and/or the x-robots-tag HTTP header.
+      </p>
+
+      <p>We will not remove any public document from our database without a court order. If you want information deleted from our site, your only recourse is to get it deleted from the public record and to obtain a court order demanding that we do the same. If you are able to furnish such a court order, we will generally remove the document from our site. If the court order demands an expungement or redaction, we will generally anonymize or redact cases by replacing names with initials or black boxes, and placing a note at the top of the document explaining the change. We will not make changes to any other documents without a court order that specifically requires that we do so.
+      </p>
+
+      <p>After we have blocked the pages, they will eventually be removed from all search engines, however, as is mentioned above, your case is a public document, and there may be other copies of it on the Internet &ndash; Although <em>we</em> will have blocked the search engines from finding your case, there may be other websites that have copies of it. In addition, we have no control over any search engine, and they may not remove your case from their results for many months, if at all.
+      </p>
+
+      <p>If, after reading the above, you would still like to have a case blocked, so it is not found by search engines, please send your written request as described above. You may also attempt to contact us via email, but we do not guarantee receipt of email communications.
+      </p>
+
+      <p>Placing court documents online is a form of First Amendment expression that is protected by numerous state and federal statutes. Despite this, we have occasionally been sued for doing so. It is against <a href="https://free.law/mission">our mission</a> to lose such a case and we never have. We have even fought back against unconstitutional judicial orders <a href="https://free.law/2020/12/30/victory-for-public-access-to-court-documents/">and won</a>. If you are considering suing us over content on our website, we strongly urge you to instead wait until the search engines have removed the content from their results.
+      </p>
+
+      <p>We might change this policy in the future and make no guarantees that we will keep blocked records in place. Further, all removals not pursuant to a court order are at our sole discretion.
+      </p>
+
+      <p>This policy is similar to the policies of other major sources of free online court opinions, and we hope that we have struck a reasonable balance between both your interests and the public's interest in your case.
+      </p>
+
+      <p><em>Last Modified: February 8, 2023</em></p>
+    </div>
+
+    <hr>
+    <div id="copyright">
+      <h3>Copyright Policy</h3>
+
+      <p>CourtListener.com follows the notice and takedown procedures in the Digital Millennium Copyright Act (DMCA), <a href="https://www.law.cornell.edu/uscode/text/17/512">17 U.S.C. Section 512</a>.
+      </p>
+
+      <p>If you believe content on CourtListener.com violates your copyright, please immediately notify its operators by sending a message with the information described below through the <a href="{% url "contact" %}">contact page</a>. Please use the subject "Copyright" in your message. If CourtListener.com's operators act in response to an infringement notice, they will make a good-faith attempt to contact the person who contributed the content using the most recent email address that person provided to CourtListener.com.
+      </p>
+
+      <p>Under the DMCA, you may be held liable for damages based on material misrepresentations in your infringement notice. You must also make a good-faith evaluation of whether the use of your content is a fair use, because fair uses are not infringing. See <a href="https://www.law.cornell.edu/uscode/text/17/107">17 U.S.C. Section 107</a> and <a href="{% url "view_case" "2937139" "stephanie-lenz-v-universal-music-corp" %}"><em>Lenz v. Universal Music Corp.</em>, No. 13-16106 (9th Cir. Sep. 14, 2015)</a>. If you are not sure if the content you want to report infringes your copyright, you should first contact a lawyer.
+      </p>
+
+      <p>The DMCA requires that all infringement notices must include <strong>all</strong> of the following:</p>
+      <ol>
+        <li>A signature of the copyright owner or a person authorized to act on the copyright owner's behalf
+        </li>
+        <li>An identification of the copyright claimed to have been infringed
+        </li>
+        <li>A description of the nature and location of the material that you claim to infringe your copyright, in sufficient detail to allow CourtListener.com to find and positively identify that material</li>
+        <li>Your name, address, telephone number, and email address
+        </li>
+        <li>A statement that you believe in good faith that the use of the material that you claim to infringe your copyright is not authorized by law, or by the copyright owner or such owner's agent
+        </li>
+        <li>A statement, under penalty of perjury, that all of the information contained in your infringement notice is accurate
+        </li>
+        <li>A statement, under penalty of perjury, that you are either the copyright owner or a person authorized to act on their behalf.
+        </li>
+      </ol>
+
+      <p>CourtListener.com will respond to all DMCA-compliant infringement notices, including, as required or appropriate, by removing the offending material or disabling all links to it.
+      </p>
+
+      <p>All received infringement notices may be posted in full to the <a href="https://lumendatabase.org/">Lumen database</a> (previously known as the Chilling Effects Clearinghouse).
+      </p>
+
+      <p><em>Added: 22 February 2016</em></p>
+    </div>
+
+    <hr>
+    {% include "includes/terms_history.html" %}
+  </div>
+{% endblock %}
diff --git a/cl/simple_pages/templates/terms/latest.html b/cl/simple_pages/templates/terms/latest.html
index d6b8f3f565..9749607192 100644
--- a/cl/simple_pages/templates/terms/latest.html
+++ b/cl/simple_pages/templates/terms/latest.html
@@ -19,8 +19,6 @@ <h3>Table of Contents</h3>
   </div>
 
   <div class="col-xs-12 col-md-8 col-lg-6">
-
-
     <h1>Terms and Policies</h1>
     <div id="terms">
       <h3>Terms of Service</h3>
@@ -39,7 +37,7 @@ <h4>3. The documents on this site may be unreliably reproduced</h4>
       </p>
 
       <h4>4. Usage Restrictions</h4>
-      <p>You will not use, intentionally or unintentionally, Courtlistener.com or any information derived therefrom in violation of any applicable international, national, federal, state, or local law. You understand that Free Law Project is not a consumer reporting agency under the Fair Credit Reporting Act (“FCRA”) and therefore you agree that you will not use Courtlistener.com and any information derived therefrom: (1) as a factor in establishing an individual’s eligibility for credit, insurance, employment, government benefits, housing, or any other FCRA purpose as specified in 15 U.S.C. § 1681b(a); (2) to generate a “consumer report” as that term is defined under FCRA, 15 U.S.C. § 1681a(d); and (3) in any manner that could result in Free Law becoming subject to FCRA. You are prohibited from using the Service and any information derived therefrom in any unauthorized manner and in furtherance of criminal or illegal activities.</p>
+      <p>You will not use, intentionally or unintentionally, Courtlistener.com or any information derived therefrom in violation of any applicable international, national, federal, state, or local law. You understand that Free Law Project is not a consumer reporting agency under the Fair Credit Reporting Act ("FCRA") and therefore you agree that you will not use Courtlistener.com and any information derived therefrom: (1) as a factor in establishing an individual's eligibility for credit, insurance, employment, government benefits, housing, or any other FCRA purpose as specified in 15 U.S.C. § 1681b(a); (2) to generate a "consumer report" as that term is defined under FCRA, 15 U.S.C. § 1681a(d); and (3) in any manner that could result in Free Law becoming subject to FCRA. You are prohibited from using the Service and any information derived therefrom in any unauthorized manner and in furtherance of criminal or illegal activities.</p>
 
       <h4><strong>5. Disclaimer of Warranty</strong></h4>
       <p><strong>You agree that use of the service is entirely at your own risk. The service is provided "as is," without warranty of any kind whatsoever, either express or implied, to you or any other person relating in any way to the service, including any part thereof, or any Web site or other content or service that may be accessible directly or indirectly through the service. Without limiting the generality of the foregoing, CourtListener.com disclaims to the maximum extent permitted by law any and all (i) warranties of merchantability or fitness for a particular purpose, (ii) warranties against infringement of any third party intellectual property or proprietary rights, (iii) warranties relating to delays, interruptions, errors or omissions in the service, or any party thereof, (iv) warranties relating to the transmission or delivery of the service, and (v) warranties otherwise related to performance, nonperformance, or other acts or omissions by CourtListener.com or any third party.</strong>
@@ -59,57 +57,29 @@ <h4>7. Governing Law and Jurisdiction</h4>
     <hr>
     <div id="privacy">
       <h3>Privacy Policy</h3>
-      <p>CourtListener does not sell information collected about your visits to this site or usage of this site and will only share such information in the ways explained in this policy. We have never used third party tracking. From 2009 to 2022 we self-hosted the Matomo analytics system so that our user's traffic would not be shared with third parties. In March of 2022, we went further, and stopped tracking users across sites or even pages by switching to <a href="https://plausible.io">Plausible Analytics</a>.
-      </p>
-
-      <p>We do not track you across pages or visits, but we do use <a href="https://plausible.io" target="_blank">Plausible Analytics</a> to collect some information from your computer, and we do log visits. These systems help us identify popular pages, diagnose technical problems, and defend our initiatives against attacks. For example, we collect the following technical information about our visitors:
-      </p>
-      <ul>
-        <li>The website that referred you to our site (via the <a href="https://en.wikipedia.org/wiki/HTTP_referrer">HTTP Referer</a> field)
-        </li>
-        <li>The browser software you use, your operating system, and your monitor's resolution
-        </li>
-        <li>What country you are from
-        </li>
-        <li>Any queries that you made in our search, alert or Atom feeds
-        </li>
-      </ul>
-
-      <p>Where possible, we delete usage-related logs automatically when they are 12 weeks old. We may share anonymized information collected with academic researchers, and they may publish their research based on that information.
-      </p>
-
-      <p>We do not collect personal information about our visitors unless they register an account. We will store account information unless you ask us to delete your account or delete it yourself.
-      </p>
-
-
+      <p>CourtListener does not sell information collected about your visits to this site or usage of this site and will only share such information in the ways explained in this policy.</p>
+      
+      <p>The systems described here help us identify popular pages, enhance our systems, diagnose technical problems, complete financial transactions, and defend our initiatives against attacks.</p>
+      
+      <p>Where possible, we delete usage-related logs automatically when they are 12 weeks old.</p>
+      
+      <p>We may use anonymized information to enhance our systems. For example, we may analyze user data such as notes, bookmarks, or search history to enhance our search algorithms and user experience, or understand how the system is being used.</p>
+      
+      <p>We do not collect personal information about our visitors unless they register an account. We will store account information unless you ask us to delete your account or delete it yourself.</p>
+      
+      <p>We may share anonymized information collected with academic researchers, and they may publish their research based on that information.</p>
+      
       <p>We currently share data with the following third parties:</p>
       <ul>
-        <li>
-          <p>We use Amazon, Inc.'s AWS services to store and process data.</p>
-        </li>
-        <li>
-          <p>For the purpose of sending newsletters, we share information with <a href="https://moosend.com/" target="_blank" rel="nofollow">Moosend, Ltd, a subsidiary of Sitecore, Inc</a>.
-          </p>
-        </li>
-        <li>
-          <p>For the purpose of processing transactions, we share information with <a href="https://www.neonone.com/" target="_blank" rel="nofollow">Neon One, LLC</a>.
-          </p>
-        </li>
-        <li>
-          <p>For the purpose of error tracking, we share error and logging information with <a href="https://sentry.com" target="_blank">Sentry, Inc</a>. This information generally does not contain any PII and is purged after 90 days.</p>
-        </li>
-        <li>
-          <p>For the purpose of counting page visits, we use <a href="https://plausible.io" target="_blank">Plausible Analytics</a>.</p>
-        </li>
-        <li>
-          <p>For the purpose of handling donations and managing donors and outreach, we use <a href="https://neonone.com/" target="_blank">Neon One</a>.</p>
-        </li>
+        <li>We do not track you across pages or visits, but we do use <a href="https://plausible.io" target="_blank">Plausible Analytics</a> to collect some information from your computer, and we do log most visits.</li>
+        <li>We use Amazon, Inc.'s AWS services to store and process data, including user data.</li>
+        <li>For the purpose of processing transactions, handling donations, and managing donors, we share information with <a href="https://www.neonone.com/" target="_blank">Neon One, LLC</a>.</li>
+        <li>For the purpose of error tracking, we share error and logging information with <a href="https://sentry.com" target="_blank">Sentry, Inc</a>. This information generally does not contain any PII and is purged after 90 days.</li>
       </ul>
 
-      <p>Please contact us if you have any complaints or concerns about our privacy policy, or notify the FTC via their <a href="https://www.ftccomplaintassistant.gov">online Complaint Assistant</a>.
-      </p>
+      <p>Please contact us if you have any complaints or concerns about our privacy policy, or notify the FTC via their <a href="https://www.ftccomplaintassistant.gov" target="_blank">online Complaint Assistant</a>.</p>
 
-      <p><em>Last modified: October 6, 2023</em></p>
+      <p><em>Last modified: September 10, 2024</em></p>
     </div>
 
     <hr>
@@ -185,4 +155,4 @@ <h3>Copyright Policy</h3>
     <hr>
     {% include "includes/terms_history.html" %}
   </div>
-{% endblock %}
+{% endblock %}
\ No newline at end of file

From 6f11a0f5f46a2b85a14b0379397d6a47a53b57ba Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 10 Sep 2024 11:14:22 -0500
Subject: [PATCH 342/372] fix(elasticsearch): Added test to confirm RECAP View
 Additional Results querystring works properly

---
 cl/custom_filters/templatetags/extras.py      |  6 ++----
 .../templates/includes/search_result.html     |  2 +-
 cl/search/tests/tests_es_recap.py             | 19 +++++++++++++++++--
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index cbcb53c062..0a35e6ba8e 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -297,8 +297,6 @@ def build_docket_id_q_param(request_q: str, docket_id: str) -> str:
     :return:The query string with the docket_id included.
     """
 
-    parts = []
     if request_q:
-        parts.append(f"({request_q})")
-    parts.append(f"docket_id:{docket_id}")
-    return mark_safe(" AND ".join(parts))
+        return f"({request_q}) AND docket_id:{docket_id}"
+    return f"docket_id:{docket_id}"
diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index 38765077ad..db862a36aa 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -217,7 +217,7 @@ <h4>
         </a>
       {% endif %}
       {% if result.child_remaining %}
-        <a href="{% url 'show_results' %}{% querystring type=type q=request.GET.q|build_docket_id_q_param:result.docket_id %}" class="btn-default btn">
+        <a href="{% url 'show_results' %}{% querystring type=type q=request.GET.q|build_docket_id_q_param:result.docket_id %}" class="btn-default btn view-additional-results">
           View Additional Results for this Case
         </a>
       {% elif result.child_remaining_query_id %}
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 502a1986f3..7730296eff 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -1,6 +1,7 @@
 import datetime
 import math
 import re
+import urllib.parse
 from http import HTTPStatus
 from unittest import mock
 
@@ -1384,14 +1385,28 @@ def test_docket_child_documents(self) -> None:
                 is_available=False,
             )
 
-        params = {"type": SEARCH_TYPES.RECAP, "docket_number": "1:21-bk-1234"}
+        params = {
+            "type": SEARCH_TYPES.RECAP,
+            "docket_number": "1:21-bk-1234",
+            "q": "SUBPOENAS SERVED ON",
+        }
         # Frontend
         r = async_to_sync(self._test_article_count)(params, 1, "docket_number")
         # Count child documents under docket.
         self._count_child_documents(0, r.content.decode(), 5, "docket_number")
 
-        # Confirm view additional results button is shown.
+        # Confirm view additional results button is shown and link params are correct.
         self.assertIn("View Additional Results for", r.content.decode())
+        tree = html.fromstring(r.content.decode())
+        docket_id_link = tree.xpath(
+            '//a[@class="btn-default btn view-additional-results"]/@href'
+        )[0]
+        decoded_url = urllib.parse.unquote(docket_id_link)
+        self.assertIn(
+            f"(SUBPOENAS+SERVED+ON)+AND+docket_id:{self.de.docket_id}",
+            decoded_url,
+        )
+        self.assertIn("docket_number=1:21-bk-1234", decoded_url)
 
         # View additional results:
         params = {

From 1811c06b2b7f6755df838f91bab6d5a0e30e183f Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 10 Sep 2024 11:40:57 -0500
Subject: [PATCH 343/372] fear(django): Added PostgreSQL connection pools to
 settings

---
 cl/settings/django.py | 10 ++++++++++
 poetry.lock           | 19 +++++++++++++++++--
 pyproject.toml        |  2 +-
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index 968323bcb3..400a00a61d 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -27,6 +27,11 @@
             # "prefer" is fine in dev, but poor in prod, where it should be
             # "require" or above.
             "sslmode": env("DB_SSL_MODE", default="require"),
+            "pool": {
+                "min_size": env("DB_POOL_MIN_SIZE", default=1),
+                "max_size": env("DB_POOL_MIN_SIZE", default=20),
+                "timeout": env("DB_POOL_TIMEOUT", default=30),
+            },
         },
     },
 }
@@ -41,6 +46,11 @@
         "CONN_MAX_AGE": env("DB_REPLICA_CONN_MAX_AGE", default=0),
         "OPTIONS": {
             "sslmode": env("DB_REPLICA_SSL_MODE", default="prefer"),
+            "pool": {
+                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=1),
+                "max_size": env("DB_REPLICA_POOL_MIN_SIZE", default=20),
+                "timeout": env("DB_REPLICA_POOL_TIMEOUT", default=30),
+            },
         },
     }
 
diff --git a/poetry.lock b/poetry.lock
index 0b39dc61f3..9a96a52feb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -3209,6 +3209,7 @@ files = [
 
 [package.dependencies]
 psycopg-binary = {version = "3.1.17", optional = true, markers = "implementation_name != \"pypy\" and extra == \"binary\""}
+psycopg-pool = {version = "*", optional = true, markers = "extra == \"pool\""}
 typing-extensions = ">=4.1"
 tzdata = {version = "*", markers = "sys_platform == \"win32\""}
 
@@ -3294,6 +3295,20 @@ files = [
     {file = "psycopg_binary-3.1.17-cp39-cp39-win_amd64.whl", hash = "sha256:d90c0531e9d591bde8cea04e75107fcddcc56811b638a34853436b23c9a3cb7d"},
 ]
 
+[[package]]
+name = "psycopg-pool"
+version = "3.2.2"
+description = "Connection Pool for Psycopg"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "psycopg_pool-3.2.2-py3-none-any.whl", hash = "sha256:273081d0fbfaced4f35e69200c89cb8fbddfe277c38cc86c235b90a2ec2c8153"},
+    {file = "psycopg_pool-3.2.2.tar.gz", hash = "sha256:9e22c370045f6d7f2666a5ad1b0caf345f9f1912195b0b25d0d3bcc4f3a7389c"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.4"
+
 [[package]]
 name = "ptyprocess"
 version = "0.7.0"
@@ -5439,4 +5454,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "7f834dc098055b684e958ea6abc3b16bdd52b0174f9a9ab10d59bf0f3f2c91b4"
+content-hash = "b9775f50e7e756d87974591327535f4b4f435bd01dfd6eabf6d2c94f23767afb"
diff --git a/pyproject.toml b/pyproject.toml
index b888435d66..4cc14ed46c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -106,7 +106,6 @@ dateparser = "1.2.0"
 types-dateparser = "^1.1.4.20240106"
 uvicorn = {extras = ["standard"], version = "^0.27.1"}
 daphne = "^4.1.0"
-psycopg = {extras = ["binary"], version = "^3.1.17"}
 httpx = {extras = ["http2"], version = "^0.26.0"}
 django-model-utils = "^4.5.1"
 django-permissions-policy = "^4.19.0"
@@ -116,6 +115,7 @@ openai = "^1.31.1"
 seal-rookery = "^2.2.3"
 types-pytz = "^2024.1.0.20240417"
 juriscraper = "^2.6.20"
+psycopg = {version = "3.1.17", extras = ["binary", "pool"]}
 
 
 [tool.poetry.group.dev.dependencies]

From 08773a0968515c8a8af099650d7796178787281c Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Tue, 10 Sep 2024 13:54:34 -0400
Subject: [PATCH 344/372] feat(assets): Adds logic to prevent the bar from
 exceeding a specific width.

---
 cl/assets/static-global/js/progress-bar.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/assets/static-global/js/progress-bar.js b/cl/assets/static-global/js/progress-bar.js
index a1c91bda45..1adfce4722 100644
--- a/cl/assets/static-global/js/progress-bar.js
+++ b/cl/assets/static-global/js/progress-bar.js
@@ -7,7 +7,8 @@ function updateProgressBar() {
     oldValue = parseFloat(progressElement.dataset.value);
   }
   let newValue = oldValue + Math.random() / 10;
-  progressElement.style.width = `${10 + newValue * 85}%`;
+  if (newValue >= 1) newValue = 1;
+  progressElement.style.width = `${10 + newValue * 75}%`;
   progressElement.dataset.value = newValue;
 }
 

From cabc3bdf5572904e92aafeca3ba957af7c4aa9d5 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 10 Sep 2024 14:47:41 -0500
Subject: [PATCH 345/372] fix(django): Fixed DB connection pools env vars

---
 cl/settings/django.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index 400a00a61d..b3aaa95071 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -28,8 +28,8 @@
             # "require" or above.
             "sslmode": env("DB_SSL_MODE", default="require"),
             "pool": {
-                "min_size": env("DB_POOL_MIN_SIZE", default=1),
-                "max_size": env("DB_POOL_MIN_SIZE", default=20),
+                "min_size": env("DB_POOL_MIN_SIZE", default=2),
+                "max_size": env("DB_POOL_MAX_SIZE", default=4),
                 "timeout": env("DB_POOL_TIMEOUT", default=30),
             },
         },
@@ -47,8 +47,8 @@
         "OPTIONS": {
             "sslmode": env("DB_REPLICA_SSL_MODE", default="prefer"),
             "pool": {
-                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=1),
-                "max_size": env("DB_REPLICA_POOL_MIN_SIZE", default=20),
+                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=2),
+                "max_size": env("DB_REPLICA_POOL_MAX_SIZE", default=4),
                 "timeout": env("DB_REPLICA_POOL_TIMEOUT", default=30),
             },
         },

From a0cb586b34eeb5d27735672e09177ab64c56632d Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 10 Sep 2024 15:23:18 -0500
Subject: [PATCH 346/372] fix(django): Tweak DB connection pools default values

---
 cl/settings/django.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index b3aaa95071..46c7c5405a 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -28,8 +28,8 @@
             # "require" or above.
             "sslmode": env("DB_SSL_MODE", default="require"),
             "pool": {
-                "min_size": env("DB_POOL_MIN_SIZE", default=2),
-                "max_size": env("DB_POOL_MAX_SIZE", default=4),
+                "min_size": env("DB_POOL_MIN_SIZE", default=1),
+                "max_size": env("DB_POOL_MAX_SIZE", default=20),
                 "timeout": env("DB_POOL_TIMEOUT", default=30),
             },
         },
@@ -47,8 +47,8 @@
         "OPTIONS": {
             "sslmode": env("DB_REPLICA_SSL_MODE", default="prefer"),
             "pool": {
-                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=2),
-                "max_size": env("DB_REPLICA_POOL_MAX_SIZE", default=4),
+                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=1),
+                "max_size": env("DB_REPLICA_POOL_MAX_SIZE", default=20),
                 "timeout": env("DB_REPLICA_POOL_TIMEOUT", default=30),
             },
         },

From f6246d9554befd68f2879272fd2114fd293fcd0f Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Tue, 10 Sep 2024 16:42:04 -0400
Subject: [PATCH 347/372] fix(progress bar): Removes visibility change listener

---
 cl/assets/static-global/js/progress-bar.js | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/cl/assets/static-global/js/progress-bar.js b/cl/assets/static-global/js/progress-bar.js
index 1adfce4722..f7115da79f 100644
--- a/cl/assets/static-global/js/progress-bar.js
+++ b/cl/assets/static-global/js/progress-bar.js
@@ -30,19 +30,3 @@ function disableAllSubmitButtons() {
     button.disabled = true;
   });
 }
-
-document.onvisibilitychange = () => {
-  if (document.visibilityState !== 'hidden') return;
-
-  let progressElement = document.getElementById('progress-bar');
-  if (progressElement == null) return;
-
-  window.clearInterval(trickleInterval);
-  progressElement.style.width = '100%';
-  progressElement.style.opacity = 0;
-
-  setTimeout(() => {
-    let progressElement = document.getElementById('progress-bar');
-    progressElement.remove();
-  }, 450);
-};

From 5243ea50dfc6ae6103d19eb1500c4f8d01a56635 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 10 Sep 2024 22:08:57 +0000
Subject: [PATCH 348/372] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/simple_pages/templates/terms/latest.html | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cl/simple_pages/templates/terms/latest.html b/cl/simple_pages/templates/terms/latest.html
index 9749607192..6f68ce8347 100644
--- a/cl/simple_pages/templates/terms/latest.html
+++ b/cl/simple_pages/templates/terms/latest.html
@@ -58,17 +58,17 @@ <h4>7. Governing Law and Jurisdiction</h4>
     <div id="privacy">
       <h3>Privacy Policy</h3>
       <p>CourtListener does not sell information collected about your visits to this site or usage of this site and will only share such information in the ways explained in this policy.</p>
-      
+
       <p>The systems described here help us identify popular pages, enhance our systems, diagnose technical problems, complete financial transactions, and defend our initiatives against attacks.</p>
-      
+
       <p>Where possible, we delete usage-related logs automatically when they are 12 weeks old.</p>
-      
+
       <p>We may use anonymized information to enhance our systems. For example, we may analyze user data such as notes, bookmarks, or search history to enhance our search algorithms and user experience, or understand how the system is being used.</p>
-      
+
       <p>We do not collect personal information about our visitors unless they register an account. We will store account information unless you ask us to delete your account or delete it yourself.</p>
-      
+
       <p>We may share anonymized information collected with academic researchers, and they may publish their research based on that information.</p>
-      
+
       <p>We currently share data with the following third parties:</p>
       <ul>
         <li>We do not track you across pages or visits, but we do use <a href="https://plausible.io" target="_blank">Plausible Analytics</a> to collect some information from your computer, and we do log most visits.</li>

From f7ecf5ba6949638044d0d13ed1aa2d5a1b0da0e7 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 12 Sep 2024 17:42:50 -0500
Subject: [PATCH 349/372] fix(django): Added postgres max_idle pool setting

---
 cl/settings/django.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index 46c7c5405a..d7629f6878 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -31,6 +31,7 @@
                 "min_size": env("DB_POOL_MIN_SIZE", default=1),
                 "max_size": env("DB_POOL_MAX_SIZE", default=20),
                 "timeout": env("DB_POOL_TIMEOUT", default=30),
+                "max_idle": env("DB_POOL_MAX_IDLE", default=600),
             },
         },
     },
@@ -50,6 +51,7 @@
                 "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=1),
                 "max_size": env("DB_REPLICA_POOL_MAX_SIZE", default=20),
                 "timeout": env("DB_REPLICA_POOL_TIMEOUT", default=30),
+                "max_idle": env("DB_REPLICA_POOL_MAX_IDLE", default=600),
             },
         },
     }

From 9b887f72db5cbc0f2f99f50225d96a812c7b1c28 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 13 Sep 2024 09:51:08 -0500
Subject: [PATCH 350/372] fix(django): Removed CONN_MAX_AGE setting to enable
 DB pooling

---
 cl/settings/django.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index d7629f6878..fa71e657aa 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -20,7 +20,6 @@
         "NAME": env("DB_NAME", default="courtlistener"),
         "USER": env("DB_USER", default="postgres"),
         "PASSWORD": env("DB_PASSWORD", default="postgres"),
-        "CONN_MAX_AGE": env("DB_CONN_MAX_AGE", default=0),
         "HOST": env("DB_HOST", default="cl-postgres"),
         "OPTIONS": {
             # See: https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION
@@ -44,7 +43,6 @@
         "PASSWORD": env("DB_REPLICA_PASSWORD", default="postgres"),
         "HOST": env("DB_REPLICA_HOST", default=""),
         "PORT": "",
-        "CONN_MAX_AGE": env("DB_REPLICA_CONN_MAX_AGE", default=0),
         "OPTIONS": {
             "sslmode": env("DB_REPLICA_SSL_MODE", default="prefer"),
             "pool": {

From 62cf51af3dbcb9d68c249a14debfe6839bce2664 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Wed, 11 Sep 2024 13:57:44 -0500
Subject: [PATCH 351/372] tests(scrapers.utils): add tests for
 get_existing_docket

- Update cl_scrape_opinions, cl_scrape_oral_arguments, harvard_opinions to user a court object for find_or_create_docket
- exclude federal jurisdictions from using new docket matching function
- Add tests
---
 .../management/commands/harvard_opinions.py   |   2 +-
 .../management/commands/cl_scrape_opinions.py |   2 +-
 .../commands/cl_scrape_oral_arguments.py      |   2 +-
 cl/scrapers/tests.py                          | 150 +++++++++++++++++-
 cl/scrapers/utils.py                          |  95 +++++++----
 5 files changed, 212 insertions(+), 39 deletions(-)

diff --git a/cl/corpus_importer/management/commands/harvard_opinions.py b/cl/corpus_importer/management/commands/harvard_opinions.py
index d0fea17fa9..a3206f8639 100644
--- a/cl/corpus_importer/management/commands/harvard_opinions.py
+++ b/cl/corpus_importer/management/commands/harvard_opinions.py
@@ -498,7 +498,7 @@ def add_new_case(
         docket = update_or_create_docket(
             case_name,
             case_name_short,
-            court_id,
+            Court.objects.get(id=court_id),
             docket_string,
             Docket.HARVARD,
             from_harvard=True,
diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index b05658cbfe..67dac880ab 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -113,7 +113,7 @@ def make_objects(
     docket = update_or_create_docket(
         item["case_names"],
         case_name_short,
-        court.pk,
+        court,
         item.get("docket_numbers", ""),
         item.get("source") or Docket.SCRAPER,
         from_harvard=False,
diff --git a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
index 8f64e6eb4d..ad284381f4 100644
--- a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
@@ -74,7 +74,7 @@ def make_objects(
     docket = update_or_create_docket(
         item["case_names"],
         case_name_short,
-        court.pk,
+        court,
         item.get("docket_numbers", ""),
         item.get("source") or Docket.SCRAPER,
         from_harvard=False,
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index ae5e33217d..375987426a 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -34,7 +34,13 @@
 from cl.scrapers.models import UrlHash
 from cl.scrapers.tasks import extract_doc_content, process_audio_file
 from cl.scrapers.test_assets import test_opinion_scraper, test_oral_arg_scraper
-from cl.scrapers.utils import get_binary_content, get_extension
+from cl.scrapers.utils import (
+    case_names_are_too_different,
+    get_binary_content,
+    get_existing_docket,
+    get_extension,
+    update_or_create_docket,
+)
 from cl.search.factories import (
     CourtFactory,
     DocketFactory,
@@ -719,3 +725,145 @@ def test_citation_scraper(self):
 
         citations = Citation.objects.filter(cluster=self.cluster).count()
         self.assertEqual(citations, 1, "Exactly 1 citation was expected")
+
+
+class ScraperDocketMatchingTest(TestCase):
+    """Docket matching behaves differently depending on court jurisdiction
+    - Federal courts use `docket_number_core`
+    - State courts do not
+    - There are also special cases such as ohioctapp
+
+    Also, test if we can detect when a docket match has a
+    case_name to different than the incoming case_name
+    """
+
+    def setUp(self):
+        self.ariz = CourtFactory(id="ariz")
+        DocketFactory(
+            docket_number="1 CA-CR 23-0297",
+            court=self.ariz,
+            source=Docket.SCRAPER,
+            pacer_case_id=None,
+        )
+        # To test query for multi docket dockets without
+        # a semicolon
+        DocketFactory(
+            docket_number="23-1374 23-1880",
+            court=self.ariz,
+            source=Docket.SCRAPER,
+            pacer_case_id=None,
+        )
+
+        # Need to disambiguate using `appeal_from_str`
+        self.ohioctapp = CourtFactory(id="ohioctapp")
+        self.ohioctapp_dn = "22CA15"
+        DocketFactory(
+            docket_number=self.ohioctapp_dn,
+            appeal_from_str="Pickaway County",
+            case_name="Dietrich v. Dietrich",
+            court=self.ohioctapp,
+            source=Docket.SCRAPER,
+            pacer_case_id=None,
+        )
+        DocketFactory(
+            docket_number=self.ohioctapp_dn,
+            appeal_from_str="Athens County",
+            case_name="State v. Myers",
+            court=self.ohioctapp,
+            source=Docket.SCRAPER,
+            pacer_case_id=None,
+        )
+
+        self.ca2 = CourtFactory(id="ca2", jurisdiction=Court.FEDERAL_APPELLATE)
+        self.ca2_docket = DocketFactory(
+            court=self.ca2,
+            docket_number="10-1039-pr",
+            case_name="Garbutt v. Conway",
+            docket_number_core="10001039",
+        )
+
+    def test_get_existing_docket(self):
+        """Can we get an existing docket if it exists,
+        or None if it doesn't?
+
+        Can we handle special cases like ohioctapp and
+        multi-docket docket numbers without semicolons?
+        """
+        # Return Docket
+        docket = get_existing_docket(self.ariz.id, "1 CA-CR 23-0297")
+        self.assertEqual(docket.docket_number, "1 CA-CR 23-0297")
+
+        docket = get_existing_docket(
+            self.ohioctapp.id, self.ohioctapp_dn, "Athens County"
+        )
+        self.assertEqual(
+            docket.appeal_from_str,
+            "Athens County",
+            "Incorrect docket match for ohioctapp",
+        )
+
+        # Test for OR query with or without semicolons
+        docket = get_existing_docket(self.ariz.id, "23-1374; 23-1880")
+        self.assertEqual(
+            get_existing_docket(self.ariz.id, "23-1374 23-1880").id,
+            docket.id,
+            "should match the same docket",
+        )
+
+        # Return None
+        docket = get_existing_docket(self.ariz.id, "1 CA-CV 23-0297-FC")
+        self.assertIsNone(docket, "Expected None")
+
+        docket = get_existing_docket(
+            self.ohioctapp.id, self.ohioctapp_dn, "Gallia County"
+        )
+        self.assertIsNone(docket, "Expected None, ohioctapp special case")
+
+    def test_different_case_names_detection(self):
+        """Can we detect case names that are too different?"""
+        similar_names = [
+            ("Miller v. Doe", "Miller v. Nelson"),
+            (
+                "IN RE: KIRKLAND LAKE GOLD LTD. SECURITIES LITIGATION",
+                "In Re: Kirkland Lake Gold",
+            ),
+            # Docket 14734478
+            (
+                "State ex rel. AWMS Water Solutions, L.L.C. v. Zehringer",
+                "State ex rel. AWMS Water Solutions, L.L.C. v. Mertz",
+            ),
+            # Docket 61614696
+            (
+                "Fortis Advisors LLC v. Johnson & Johnson, Ethicon, Inc., Alex Gorsky, Ashley McEvoy, Peter Shen and Susan Morano",
+                "Fortis Advisors LLC v. Johnson & Johnson",
+            ),
+        ]
+        different_names = [
+            # Docket 68390253, ohioctapp error
+            ("M.A.N.S.O. Holding, L.L.C. v. Marquette", "State v. Sweeney"),
+            # Docket 68295573, az error
+            ("Van Camp v. Van Camp", "State v. Snyder"),
+        ]
+        for first, second in similar_names:
+            self.assertFalse(
+                case_names_are_too_different(first, second),
+                "Case names should not be marked as too different",
+            )
+
+        for first, second in different_names:
+            self.assertTrue(
+                case_names_are_too_different(first, second),
+                "Case names should be marked as too different",
+            )
+
+    def test_federal_jurisdictions(self):
+        """These courts should follow the flow that uses
+        cl.recap.mergers.find_docket_object and relies on
+        Docket.docket_number_core
+        """
+        docket = update_or_create_docket(
+            "Garbutt v Conway", "", self.ca2, "10-1039", Docket.SCRAPER, False
+        )
+        self.assertEqual(
+            docket, self.ca2_docket, "Should match using docket number core"
+        )
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index cf2d6528c4..25a2c4b2e6 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -9,7 +9,7 @@
 from asgiref.sync import async_to_sync
 from courts_db import find_court_by_id, find_court_ids_by_name
 from django.conf import settings
-from django.db.models import QuerySet
+from django.db.models import Q, QuerySet
 from juriscraper import AbstractSite
 from juriscraper.AbstractSite import logger
 from juriscraper.lib.test_utils import MockRequest
@@ -20,7 +20,7 @@
 from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.decorators import retry
 from cl.lib.microservice_utils import microservice
-from cl.recap.mergers import find_docket_object, make_docket_number_core
+from cl.recap.mergers import find_docket_object
 from cl.scrapers.exceptions import (
     EmptyFileError,
     NoDownloadUrlError,
@@ -289,7 +289,9 @@ def extract_recap_documents(
             sys.stdout.flush()
 
 
-def get_existing_docket(court_id: str, docket_number: str) -> Docket | None:
+def get_existing_docket(
+    court_id: str, docket_number: str, appeal_from_str: str = ""
+) -> Docket | None:
     """Look for an existing docket for a given court_id and docket number
 
     recap.mergers.find_docket_object prioritizes lookups by docket_number_core
@@ -307,40 +309,63 @@ def get_existing_docket(court_id: str, docket_number: str) -> Docket | None:
 
     :param court_id: the court id
     :param docket_number: the docket number
+    :param appeal_from_str: useful for disambiguating `ohioctapp` dockets,
+        this is the "lower_courts" returned juriscraper field
 
     :return: Docket if find a match, None if we don't
     """
-    lookups = [
-        {"court_id": court_id, "docket_number": docket_number},
-    ]
-
-    docket_number_core = make_docket_number_core(docket_number)
-    if docket_number_core:
-        lookups.append(
-            {
-                "court_id": court_id,
-                "pacer_case_id": None,
-                "docket_number_core": docket_number_core,
-            }
+
+    # delete semicolons only for the lookup, for back compatibility
+    # with juriscraper string formatting
+    # https://github.com/freelawproject/juriscraper/pull/1166
+    lookup = Q(court_id=court_id) & (
+        Q(docket_number=docket_number.replace(";", ""))
+        | Q(docket_number=docket_number)
+    )
+
+    # Special case where docket numbers are the same and repeated
+    # across districts, but can be disambiguated using the lower court
+    if court_id == "ohioctapp" and appeal_from_str:
+        lookup = lookup & Q(appeal_from_str=appeal_from_str)
+
+    queryset = Docket.objects.filter(lookup)
+    count = queryset.count()
+    if count == 1:
+        return queryset[0]
+    if count > 1:
+        logger.error(
+            "%s: more than 1 docket match for docket number '%s'",
+            court_id,
+            docket_number,
         )
+        return queryset[0]
 
-    for lookup in lookups:
-        queryset = Docket.objects.filter(**lookup)
-        count = queryset.count()
-        if count == 1:
-            return queryset[0]
-        if count > 1:
-            logger.error(
-                "%s: more than 1 docket match for docket number '%s'",
-                court_id,
-                docket_number,
-            )
+
+def case_names_are_too_different(
+    first: str, second: str, threshold: float = 0.5
+) -> bool:
+    """Compares 2 case names' words as a similitude measure
+    Useful to raise a warning when updating a docket and names are found
+    to be too different
+
+    :param first: first case name
+    :param second: second case name
+    :param threshold: minimum percentage of words in common
+
+    :return: True if case names are too different according to the threshold;
+        False if names are similar
+    """
+    new_parts = winnow_case_name(first.lower())
+    old_parts = winnow_case_name(second.lower())
+    # or 1 to prevent 0 lenght minimum
+    denominator = min(len(old_parts), len(new_parts)) or 1
+    return len(new_parts.intersection(old_parts)) / denominator < threshold
 
 
 def update_or_create_docket(
     case_name: str,
     case_name_short: str,
-    court_id: str,
+    court: Court,
     docket_number: str,
     source: int,
     from_harvard: bool,
@@ -356,7 +381,7 @@ def update_or_create_docket(
 
     :param case_name: The docket case_name.
     :param case_name_short: The docket case_name_short
-    :param court_id: The court id the docket belongs to.
+    :param court: The court objects the docket belongs to
     :param docket_number: The docket number.
     :param source: The docket source.
     :param from_harvard: True when this function is called from the
@@ -371,7 +396,6 @@ def update_or_create_docket(
     :param appeal_from_str: Name (not standardized id) of the lower level court.
     :return: The docket.
     """
-
     docket_fields = {
         "case_name": case_name,
         "case_name_short": case_name_short,
@@ -382,12 +406,16 @@ def update_or_create_docket(
         "date_blocked": date_blocked,
         "date_argued": date_argued,
     }
-    if from_harvard:
+
+    court_id = court.pk
+    uses_docket_number_core = court.jurisdiction in Court.FEDERAL_JURISDICTIONS
+
+    if from_harvard or uses_docket_number_core:
         docket = async_to_sync(find_docket_object)(
             court_id, None, docket_number, None, None, None
         )
     else:
-        docket = get_existing_docket(court_id, docket_number)
+        docket = get_existing_docket(court_id, docket_number, appeal_from_str)
 
     if not docket or not docket.pk:
         return Docket(
@@ -414,10 +442,7 @@ def update_or_create_docket(
         ):
             # Safeguard to catch possible docket mismatches, check that they
             # have at least 50% of words in common
-            new_parts = winnow_case_name(value)
-            old_parts = winnow_case_name(docket.case_name)
-            denominator = min(len(old_parts), len(new_parts)) + 1
-            if len(new_parts.intersection(old_parts)) / denominator < 0.5:
+            if case_names_are_too_different(value, docket.case_name, 0.5):
                 logger.error(
                     "New case_name '%s' looks too different from old '%s'. Court %s. Docket %s",
                     value,

From 356484b9003eb94ce0f0fd96ee4d0803abe5d262 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 13 Sep 2024 16:55:08 -0500
Subject: [PATCH 352/372] build(deps): bump boto3 from 1.34.17 to 1.35.19

---
 poetry.lock    | 20 ++++++++++----------
 pyproject.toml |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 9a96a52feb..dda17c5c98 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -286,17 +286,17 @@ uvloop = ["uvloop (>=0.15.2)"]
 
 [[package]]
 name = "boto3"
-version = "1.34.17"
+version = "1.35.19"
 description = "The AWS SDK for Python"
 optional = false
-python-versions = ">= 3.8"
+python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.34.17-py3-none-any.whl", hash = "sha256:1efc02be786884034d503d59c018cf7650d0cff9fcb37cd2eb49b802a6fe6111"},
-    {file = "boto3-1.34.17.tar.gz", hash = "sha256:8ca248cc84e7e859e4e276eb9c4309fa01a3e58473bf48d6c33448be870c2bb8"},
+    {file = "boto3-1.35.19-py3-none-any.whl", hash = "sha256:84b3fe1727945bc3cada832d969ddb3dc0d08fce1677064ca8bdc13a89c1a143"},
+    {file = "boto3-1.35.19.tar.gz", hash = "sha256:9979fe674780a0b7100eae9156d74ee374cd1638a9f61c77277e3ce712f3e496"},
 ]
 
 [package.dependencies]
-botocore = ">=1.34.17,<1.35.0"
+botocore = ">=1.35.19,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -305,13 +305,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.34.128"
+version = "1.35.19"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.34.128-py3-none-any.whl", hash = "sha256:db67fda136c372ab3fa432580c819c89ba18d28a6152a4d2a7ea40d44082892e"},
-    {file = "botocore-1.34.128.tar.gz", hash = "sha256:8d8e03f7c8c080ecafda72036eb3b482d649f8417c90b5dca33b7c2c47adb0c9"},
+    {file = "botocore-1.35.19-py3-none-any.whl", hash = "sha256:c83f7f0cacfe7c19b109b363ebfa8736e570d24922f16ed371681f58ebab44a9"},
+    {file = "botocore-1.35.19.tar.gz", hash = "sha256:42d6d8db7250cbd7899f786f9861e02cab17dc238f64d6acb976098ed9809625"},
 ]
 
 [package.dependencies]
@@ -320,7 +320,7 @@ python-dateutil = ">=2.1,<3.0.0"
 urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
 
 [package.extras]
-crt = ["awscrt (==0.20.11)"]
+crt = ["awscrt (==0.21.5)"]
 
 [[package]]
 name = "celery"
@@ -5454,4 +5454,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "b9775f50e7e756d87974591327535f4b4f435bd01dfd6eabf6d2c94f23767afb"
+content-hash = "eed313a36ecd4a8b7a0507df71c23f7018fe63e795484860b03a44c61abd8a9a"
diff --git a/pyproject.toml b/pyproject.toml
index d19b7b782d..c5a7720d37 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,6 @@ cl-manage = "manage:main"
 ada-url = "^1.15.0"
 argparse = "*"
 beautifulsoup4 = "==4.12.*"
-boto3 = "^1.35.11"
 celery = "^5.3.6"
 certifi = "^2024.7.4"
 courts-db = "*"
@@ -116,6 +115,7 @@ seal-rookery = "^2.2.3"
 types-pytz = "^2024.1.0.20240417"
 juriscraper = "^2.6.20"
 psycopg = {version = "3.1.17", extras = ["binary", "pool"]}
+boto3 = "^1.35.19"
 
 
 [tool.poetry.group.dev.dependencies]

From 2df6d1cd018a3d70b043133ff56987ef79af763f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 13 Sep 2024 22:31:14 +0000
Subject: [PATCH 353/372] build(deps): bump httpx from 0.26.0 to 0.27.2

Bumps [httpx](https://github.com/encode/httpx) from 0.26.0 to 0.27.2.
- [Release notes](https://github.com/encode/httpx/releases)
- [Changelog](https://github.com/encode/httpx/blob/master/CHANGELOG.md)
- [Commits](https://github.com/encode/httpx/compare/0.26.0...0.27.2)

---
updated-dependencies:
- dependency-name: httpx
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 11 ++++++-----
 pyproject.toml |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index dda17c5c98..27805a39c4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -1858,13 +1858,13 @@ test = ["Cython (>=0.29.24,<0.30.0)"]
 
 [[package]]
 name = "httpx"
-version = "0.26.0"
+version = "0.27.2"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "httpx-0.26.0-py3-none-any.whl", hash = "sha256:8915f5a3627c4d47b73e8202457cb28f1266982d1159bd5779d86a80c0eab1cd"},
-    {file = "httpx-0.26.0.tar.gz", hash = "sha256:451b55c30d5185ea6b23c2c793abf9bb237d2a7dfb901ced6ff69ad37ec1dfaf"},
+    {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
+    {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
 ]
 
 [package.dependencies]
@@ -1880,6 +1880,7 @@ brotli = ["brotli", "brotlicffi"]
 cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
+zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "hyperframe"
@@ -5454,4 +5455,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "eed313a36ecd4a8b7a0507df71c23f7018fe63e795484860b03a44c61abd8a9a"
+content-hash = "378ec3d2ddd5ebc58e6718f17ddaaa21e80655fca13165255e98b6419adf1a7d"
diff --git a/pyproject.toml b/pyproject.toml
index c5a7720d37..9e9cb621b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -105,7 +105,7 @@ dateparser = "1.2.0"
 types-dateparser = "^1.1.4.20240106"
 uvicorn = {extras = ["standard"], version = "^0.27.1"}
 daphne = "^4.1.0"
-httpx = {extras = ["http2"], version = "^0.26.0"}
+httpx = {extras = ["http2"], version = "^0.27.2"}
 django-model-utils = "^4.5.1"
 django-permissions-policy = "^4.19.0"
 tiktoken = "^0.6.0"

From 50aaf38f0ffa7913ffec554351887b6c5c0a0d43 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 13 Sep 2024 23:35:04 +0000
Subject: [PATCH 354/372] build(deps): bump ipython from 8.26.0 to 8.27.0

Bumps [ipython](https://github.com/ipython/ipython) from 8.26.0 to 8.27.0.
- [Release notes](https://github.com/ipython/ipython/releases)
- [Commits](https://github.com/ipython/ipython/compare/8.26.0...8.27.0)

---
updated-dependencies:
- dependency-name: ipython
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 8 ++++----
 pyproject.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 27805a39c4..73ad17b203 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2092,13 +2092,13 @@ files = [
 
 [[package]]
 name = "ipython"
-version = "8.26.0"
+version = "8.27.0"
 description = "IPython: Productive Interactive Computing"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "ipython-8.26.0-py3-none-any.whl", hash = "sha256:e6b347c27bdf9c32ee9d31ae85defc525755a1869f14057e900675b9e8d6e6ff"},
-    {file = "ipython-8.26.0.tar.gz", hash = "sha256:1cec0fbba8404af13facebe83d04436a7434c7400e59f47acf467c64abd0956c"},
+    {file = "ipython-8.27.0-py3-none-any.whl", hash = "sha256:f68b3cb8bde357a5d7adc9598d57e22a45dfbea19eb6b98286fa3b288c9cd55c"},
+    {file = "ipython-8.27.0.tar.gz", hash = "sha256:0b99a2dc9f15fd68692e898e5568725c6d49c527d36a9fb5960ffbdeaa82ff7e"},
 ]
 
 [package.dependencies]
@@ -5455,4 +5455,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "378ec3d2ddd5ebc58e6718f17ddaaa21e80655fca13165255e98b6419adf1a7d"
+content-hash = "7bd257a9419a089652417bca8e14bb355790b922f1b223efa6e7bbab294d2c69"
diff --git a/pyproject.toml b/pyproject.toml
index 9e9cb621b5..169f1601ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,7 +99,7 @@ judge-pics = "^2.0.5"
 django-admin-cursor-paginator = "^0.1.5"
 sentry-sdk = {extras = ["celery", "django"], version = "^2.8.0"}
 selenium = "^4.16.0"
-ipython = "^8.26.0"
+ipython = "^8.27.0"
 time-machine = "^2.14.1"
 dateparser = "1.2.0"
 types-dateparser = "^1.1.4.20240106"

From fffd124aba8f8a08aec0618715e1baaf4252d40c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 13 Sep 2024 23:47:21 +0000
Subject: [PATCH 355/372] chore(deps): Bump github/codeql-action from 2 to 3

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2 to 3.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/v2...v3)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/codeql-analysis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index d7ec83be43..2203ff239c 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -28,11 +28,11 @@ jobs:
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: python
         config-file: ./.github/codeql-config.yml
         setup-python-dependencies: true
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3

From ced4ccb9d3ab44e8c982ac5e7b4de18e9e6c64f2 Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Sat, 14 Sep 2024 07:59:49 -0700
Subject: [PATCH 356/372] Revert "fix(django): Removed CONN_MAX_AGE setting to
 enable DB pooling"

This reverts commit 9b887f72db5cbc0f2f99f50225d96a812c7b1c28.
---
 cl/settings/django.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index fa71e657aa..d7629f6878 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -20,6 +20,7 @@
         "NAME": env("DB_NAME", default="courtlistener"),
         "USER": env("DB_USER", default="postgres"),
         "PASSWORD": env("DB_PASSWORD", default="postgres"),
+        "CONN_MAX_AGE": env("DB_CONN_MAX_AGE", default=0),
         "HOST": env("DB_HOST", default="cl-postgres"),
         "OPTIONS": {
             # See: https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION
@@ -43,6 +44,7 @@
         "PASSWORD": env("DB_REPLICA_PASSWORD", default="postgres"),
         "HOST": env("DB_REPLICA_HOST", default=""),
         "PORT": "",
+        "CONN_MAX_AGE": env("DB_REPLICA_CONN_MAX_AGE", default=0),
         "OPTIONS": {
             "sslmode": env("DB_REPLICA_SSL_MODE", default="prefer"),
             "pool": {

From 0408ce01fc52f1a072f30be24596d48606ae493c Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Sat, 14 Sep 2024 07:59:49 -0700
Subject: [PATCH 357/372] Revert "fix(django): Added postgres max_idle pool
 setting"

This reverts commit f7ecf5ba6949638044d0d13ed1aa2d5a1b0da0e7.
---
 cl/settings/django.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index d7629f6878..46c7c5405a 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -31,7 +31,6 @@
                 "min_size": env("DB_POOL_MIN_SIZE", default=1),
                 "max_size": env("DB_POOL_MAX_SIZE", default=20),
                 "timeout": env("DB_POOL_TIMEOUT", default=30),
-                "max_idle": env("DB_POOL_MAX_IDLE", default=600),
             },
         },
     },
@@ -51,7 +50,6 @@
                 "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=1),
                 "max_size": env("DB_REPLICA_POOL_MAX_SIZE", default=20),
                 "timeout": env("DB_REPLICA_POOL_TIMEOUT", default=30),
-                "max_idle": env("DB_REPLICA_POOL_MAX_IDLE", default=600),
             },
         },
     }

From 2cdc7a0df5d7bb41c6579f8f037c166fcfca6f9b Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Sat, 14 Sep 2024 07:59:49 -0700
Subject: [PATCH 358/372] Revert "fix(django): Tweak DB connection pools
 default values"

This reverts commit a0cb586b34eeb5d27735672e09177ab64c56632d.
---
 cl/settings/django.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index 46c7c5405a..b3aaa95071 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -28,8 +28,8 @@
             # "require" or above.
             "sslmode": env("DB_SSL_MODE", default="require"),
             "pool": {
-                "min_size": env("DB_POOL_MIN_SIZE", default=1),
-                "max_size": env("DB_POOL_MAX_SIZE", default=20),
+                "min_size": env("DB_POOL_MIN_SIZE", default=2),
+                "max_size": env("DB_POOL_MAX_SIZE", default=4),
                 "timeout": env("DB_POOL_TIMEOUT", default=30),
             },
         },
@@ -47,8 +47,8 @@
         "OPTIONS": {
             "sslmode": env("DB_REPLICA_SSL_MODE", default="prefer"),
             "pool": {
-                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=1),
-                "max_size": env("DB_REPLICA_POOL_MAX_SIZE", default=20),
+                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=2),
+                "max_size": env("DB_REPLICA_POOL_MAX_SIZE", default=4),
                 "timeout": env("DB_REPLICA_POOL_TIMEOUT", default=30),
             },
         },

From 640b616cb9d71818f510b6f3c6a28616380d0160 Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Sat, 14 Sep 2024 07:59:49 -0700
Subject: [PATCH 359/372] Revert "fix(django): Fixed DB connection pools env
 vars"

This reverts commit cabc3bdf5572904e92aafeca3ba957af7c4aa9d5.
---
 cl/settings/django.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index b3aaa95071..400a00a61d 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -28,8 +28,8 @@
             # "require" or above.
             "sslmode": env("DB_SSL_MODE", default="require"),
             "pool": {
-                "min_size": env("DB_POOL_MIN_SIZE", default=2),
-                "max_size": env("DB_POOL_MAX_SIZE", default=4),
+                "min_size": env("DB_POOL_MIN_SIZE", default=1),
+                "max_size": env("DB_POOL_MIN_SIZE", default=20),
                 "timeout": env("DB_POOL_TIMEOUT", default=30),
             },
         },
@@ -47,8 +47,8 @@
         "OPTIONS": {
             "sslmode": env("DB_REPLICA_SSL_MODE", default="prefer"),
             "pool": {
-                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=2),
-                "max_size": env("DB_REPLICA_POOL_MAX_SIZE", default=4),
+                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=1),
+                "max_size": env("DB_REPLICA_POOL_MIN_SIZE", default=20),
                 "timeout": env("DB_REPLICA_POOL_TIMEOUT", default=30),
             },
         },

From 9b4c1376a8d4214cd98d3df83d5bfd87e144036d Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Sat, 14 Sep 2024 08:00:59 -0700
Subject: [PATCH 360/372] Revert "fear(django): Added PostgreSQL connection
 pools to settings"

This reverts commit 1811c06b
---
 cl/settings/django.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/cl/settings/django.py b/cl/settings/django.py
index 400a00a61d..968323bcb3 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -27,11 +27,6 @@
             # "prefer" is fine in dev, but poor in prod, where it should be
             # "require" or above.
             "sslmode": env("DB_SSL_MODE", default="require"),
-            "pool": {
-                "min_size": env("DB_POOL_MIN_SIZE", default=1),
-                "max_size": env("DB_POOL_MIN_SIZE", default=20),
-                "timeout": env("DB_POOL_TIMEOUT", default=30),
-            },
         },
     },
 }
@@ -46,11 +41,6 @@
         "CONN_MAX_AGE": env("DB_REPLICA_CONN_MAX_AGE", default=0),
         "OPTIONS": {
             "sslmode": env("DB_REPLICA_SSL_MODE", default="prefer"),
-            "pool": {
-                "min_size": env("DB_REPLICA_POOL_MIN_SIZE", default=1),
-                "max_size": env("DB_REPLICA_POOL_MIN_SIZE", default=20),
-                "timeout": env("DB_REPLICA_POOL_TIMEOUT", default=30),
-            },
         },
     }
 

From 61b8b665c9c701d3b3d0bd11e41f3229c0adf9ce Mon Sep 17 00:00:00 2001
From: grossir <grossir@users.noreply.github.com>
Date: Mon, 16 Sep 2024 14:07:47 +0000
Subject: [PATCH 361/372] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 73ad17b203..bdd017a246 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2239,13 +2239,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.24"
+version = "2.6.26"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.24-py27-none-any.whl", hash = "sha256:0162fd8fceb4bd1de3c8715e8e801b5031900e5d2a77bf2239604fd1e7c9e9f7"},
-    {file = "juriscraper-2.6.24.tar.gz", hash = "sha256:d3d964ae3e7f53541c2a6909cfebb83fbb91a061774809a160caa2851f2a333c"},
+    {file = "juriscraper-2.6.26-py27-none-any.whl", hash = "sha256:e14ed19da424b97a7ffb2bd1b7c767ea711563a2699be2ba08bf7604b1e0b668"},
+    {file = "juriscraper-2.6.26.tar.gz", hash = "sha256:55d102e26513b0c1681cc240f81f2c58774c087278d47a54ce5c5fb51e853309"},
 ]
 
 [package.dependencies]

From 33d3d11b0c08fa732f08c785460f02d400182743 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 16 Sep 2024 14:47:24 +0000
Subject: [PATCH 362/372] build(deps): bump gunicorn from 22.0.0 to 23.0.0

Bumps [gunicorn](https://github.com/benoitc/gunicorn) from 22.0.0 to 23.0.0.
- [Release notes](https://github.com/benoitc/gunicorn/releases)
- [Commits](https://github.com/benoitc/gunicorn/compare/22.0.0...23.0.0)

---
updated-dependencies:
- dependency-name: gunicorn
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 poetry.lock    | 8 ++++----
 pyproject.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index bdd017a246..ec069a0fc3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1696,13 +1696,13 @@ files = [
 
 [[package]]
 name = "gunicorn"
-version = "22.0.0"
+version = "23.0.0"
 description = "WSGI HTTP Server for UNIX"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "gunicorn-22.0.0-py3-none-any.whl", hash = "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9"},
-    {file = "gunicorn-22.0.0.tar.gz", hash = "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63"},
+    {file = "gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d"},
+    {file = "gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec"},
 ]
 
 [package.dependencies]
@@ -5455,4 +5455,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "7bd257a9419a089652417bca8e14bb355790b922f1b223efa6e7bbab294d2c69"
+content-hash = "e5e4471a149c8e298d3138622ce33554dec9d112341f05faeeff858b4f9505aa"
diff --git a/pyproject.toml b/pyproject.toml
index 169f1601ef..552dbf97c2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -79,7 +79,7 @@ unidecode = "*"
 usaddress = "^0.5.10"
 scorched = {git = "https://github.com/freelawproject/scorched.git", branch="main"}
 djangorestframework-filters = "1.0.0.dev2"
-gunicorn = "^22.0.0"
+gunicorn = "^23.0.0"
 django-hCaptcha = "^0.2.0"
 reporters-db = "*"
 django-waffle = "^4.1.0"

From 3522b3780e7849215d1024b5cde1af837d680137 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 16 Sep 2024 13:02:37 -0500
Subject: [PATCH 363/372] build(deps): bump drf-dynamic-fields to 0.4.0

---
 poetry.lock    | 4 ++--
 pyproject.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index ec069a0fc3..71f0eec69c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "ada-url"
@@ -5455,4 +5455,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "e5e4471a149c8e298d3138622ce33554dec9d112341f05faeeff858b4f9505aa"
+content-hash = "83789940303ae06b3417fd981df3a1a16d3071fe69d5f378d705dfef25f992eb"
diff --git a/pyproject.toml b/pyproject.toml
index 552dbf97c2..aebac3479a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,7 +47,6 @@ django-ratelimit = "^4.1.0"
 django-storages = "^1.14.2"
 djangorestframework = {git = "https://github.com/encode/django-rest-framework.git", rev = "cc3c89a11c7ee9cf7cfd732e0a329c318ace71b2"}
 djangorestframework-xml = "^2.0.0"
-drf-dynamic-fields = "*"
 feedparser = "^6.0.10"
 httplib2 = "^0.22.0"
 igraph = "^0.11.3"
@@ -116,6 +115,7 @@ types-pytz = "^2024.1.0.20240417"
 juriscraper = "^2.6.20"
 psycopg = {version = "3.1.17", extras = ["binary", "pool"]}
 boto3 = "^1.35.19"
+drf-dynamic-fields = "^0.4.0"
 
 
 [tool.poetry.group.dev.dependencies]

From edd5391c8855d38159a33d4677b11b158f573fd7 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 16 Sep 2024 14:45:25 -0500
Subject: [PATCH 364/372] build(deps): bump django-ses from 3.5.2 to 4.1.1

---
 poetry.lock    | 19 +++++++++----------
 pyproject.toml |  2 +-
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 71f0eec69c..bbc18f01b9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1184,25 +1184,24 @@ files = [
 
 [[package]]
 name = "django-ses"
-version = "3.5.2"
+version = "4.1.1"
 description = "A Django email backend for Amazon's Simple Email Service (SES)"
 optional = false
-python-versions = ">=3.8,<4.0"
+python-versions = "<4.0,>=3.8"
 files = [
-    {file = "django_ses-3.5.2-py3-none-any.whl", hash = "sha256:90c68cc6ca3467893faa8499981c81ba8ff2bd3f3acb08c06423a4142d6a0fc6"},
-    {file = "django_ses-3.5.2.tar.gz", hash = "sha256:b6d94689bc15de02a11e84f05a5bf4a7895688e570c6f07c21698094debc6ced"},
+    {file = "django_ses-4.1.1-py3-none-any.whl", hash = "sha256:83bb09d2b149dbc1a67a89f66a6681a99e32b8253f47ff4b7fae241d05344cc8"},
+    {file = "django_ses-4.1.1.tar.gz", hash = "sha256:28f9931df1251660eaf976b7413a42e7b0d25a18043316b96c721d8add7bf7b8"},
 ]
 
 [package.dependencies]
 boto3 = ">=1.0.0"
 cryptography = {version = ">=36.0.2", optional = true, markers = "extra == \"bounce\" or extra == \"events\""}
-django = ">=2.2"
-pytz = ">=2016.10"
-requests = {version = ">=2.27.1", optional = true, markers = "extra == \"bounce\" or extra == \"events\""}
+django = ">=3.2"
+requests = {version = ">=2.32.1", optional = true, markers = "extra == \"bounce\" or extra == \"events\""}
 
 [package.extras]
-bounce = ["cryptography (>=36.0.2)", "requests (>=2.27.1)"]
-events = ["cryptography (>=36.0.2)", "requests (>=2.27.1)"]
+bounce = ["cryptography (>=36.0.2)", "requests (>=2.32.1)"]
+events = ["cryptography (>=36.0.2)", "requests (>=2.32.1)"]
 
 [[package]]
 name = "django-storages"
@@ -5455,4 +5454,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "83789940303ae06b3417fd981df3a1a16d3071fe69d5f378d705dfef25f992eb"
+content-hash = "3323ebee63fda656fb26f18e6dc398649cf7466455eef08281155c8c28ee777f"
diff --git a/pyproject.toml b/pyproject.toml
index aebac3479a..944f0eb0c8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -92,7 +92,6 @@ PyStemmer = "^2.2.0.1"
 factory-boy = "^3.3.0"
 django-elasticsearch-dsl = "^8.0"
 django-override-storage = "^0.3.2"
-django-ses = {extras = ["events"], version = "^3.5.2"}
 django-environ = "^0.11.2"
 judge-pics = "^2.0.5"
 django-admin-cursor-paginator = "^0.1.5"
@@ -116,6 +115,7 @@ juriscraper = "^2.6.20"
 psycopg = {version = "3.1.17", extras = ["binary", "pool"]}
 boto3 = "^1.35.19"
 drf-dynamic-fields = "^0.4.0"
+django-ses = {extras = ["events"], version = "^4.1.1"}
 
 
 [tool.poetry.group.dev.dependencies]

From 7f2e9505a1dd54ae409b60a4e38eb59d6e8e637b Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 16 Sep 2024 14:47:45 -0500
Subject: [PATCH 365/372] fix(scrapers.utils.get_existing_docket): return None
 when docket_number is an empty string

Related to #4256

Doing a Docket lookup when the docket number is empty will match false positives
---
 cl/scrapers/utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index 25a2c4b2e6..31134ce3d2 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -314,6 +314,9 @@ def get_existing_docket(
 
     :return: Docket if find a match, None if we don't
     """
+    # Avoid lookups by blank docket number
+    if not docket_number.strip():
+        return
 
     # delete semicolons only for the lookup, for back compatibility
     # with juriscraper string formatting

From 512f24eeaa3b11d1372447357d1fcd88ca28bbc2 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 16 Sep 2024 16:32:37 -0500
Subject: [PATCH 366/372] build(deps): bump redis from 5.0.1 to 5.0.8

---
 poetry.lock | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 71f0eec69c..4bf3cd0bf4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3974,17 +3974,17 @@ full = ["numpy"]
 
 [[package]]
 name = "redis"
-version = "5.0.1"
+version = "5.0.8"
 description = "Python client for Redis database and key-value store"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "redis-5.0.1-py3-none-any.whl", hash = "sha256:ed4802971884ae19d640775ba3b03aa2e7bd5e8fb8dfaed2decce4d0fc48391f"},
-    {file = "redis-5.0.1.tar.gz", hash = "sha256:0dab495cd5753069d3bc650a0dde8a8f9edde16fc5691b689a566eda58100d0f"},
+    {file = "redis-5.0.8-py3-none-any.whl", hash = "sha256:56134ee08ea909106090934adc36f65c9bcbbaecea5b21ba704ba6fb561f8eb4"},
+    {file = "redis-5.0.8.tar.gz", hash = "sha256:0c5b10d387568dfe0698c6fad6615750c24170e548ca2deac10c649d463e9870"},
 ]
 
 [package.extras]
-hiredis = ["hiredis (>=1.0.0)"]
+hiredis = ["hiredis (>1.0.0)"]
 ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"]
 
 [[package]]

From 6f9189201159efe1dcd32e1b2548de1e73df3905 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 16 Sep 2024 23:02:45 +0000
Subject: [PATCH 367/372] chore(deps): bump docker/login-action from 2 to 3

Bumps [docker/login-action](https://github.com/docker/login-action) from 2 to 3.
- [Release notes](https://github.com/docker/login-action/releases)
- [Commits](https://github.com/docker/login-action/compare/v2...v3)

---
updated-dependencies:
- dependency-name: docker/login-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/docker-build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index 89d4b1dc7a..bc88a15536 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -14,7 +14,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Login to Docker Hub
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}

From 5d66e7b374b90aa33994105079d339288b8d4df0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 16 Sep 2024 23:20:04 +0000
Subject: [PATCH 368/372] chore(deps): bump
 aws-actions/configure-aws-credentials from 3 to 4

Bumps [aws-actions/configure-aws-credentials](https://github.com/aws-actions/configure-aws-credentials) from 3 to 4.
- [Release notes](https://github.com/aws-actions/configure-aws-credentials/releases)
- [Changelog](https://github.com/aws-actions/configure-aws-credentials/blob/main/CHANGELOG.md)
- [Commits](https://github.com/aws-actions/configure-aws-credentials/compare/v3...v4)

---
updated-dependencies:
- dependency-name: aws-actions/configure-aws-credentials
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/docker-build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index bc88a15536..8cac4f50ba 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -33,7 +33,7 @@ jobs:
         run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
 
       - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v3
+        uses: aws-actions/configure-aws-credentials@v4
         with:
           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}

From d0077610e8090cb0ce725c3a0f140f643af807c4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 16 Sep 2024 23:51:04 +0000
Subject: [PATCH 369/372] chore(deps): bump docker/setup-buildx-action from 2
 to 3

Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 2 to 3.
- [Release notes](https://github.com/docker/setup-buildx-action/releases)
- [Commits](https://github.com/docker/setup-buildx-action/compare/v2...v3)

---
updated-dependencies:
- dependency-name: docker/setup-buildx-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 92d2426e66..38d8297458 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -53,7 +53,7 @@ jobs:
       # Build and cache docker images so tests are always run on the latest
       # dependencies
       - name: Set up docker Buildx
-        uses: docker/setup-buildx-action@v2
+        uses: docker/setup-buildx-action@v3
         with:
           driver-opts: network=host
       - name: Prebuild docker images

From 96470d6f692aed14c9485d4a9d96369702ec7bbc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Sep 2024 14:15:06 +0000
Subject: [PATCH 370/372] build(deps): bump serve-static and express in /cl

Bumps [serve-static](https://github.com/expressjs/serve-static) and [express](https://github.com/expressjs/express). These dependencies needed to be updated together.

Updates `serve-static` from 1.15.0 to 1.16.2
- [Release notes](https://github.com/expressjs/serve-static/releases)
- [Changelog](https://github.com/expressjs/serve-static/blob/v1.16.2/HISTORY.md)
- [Commits](https://github.com/expressjs/serve-static/compare/v1.15.0...v1.16.2)

Updates `express` from 4.18.1 to 4.21.0
- [Release notes](https://github.com/expressjs/express/releases)
- [Changelog](https://github.com/expressjs/express/blob/4.21.0/History.md)
- [Commits](https://github.com/expressjs/express/compare/4.18.1...4.21.0)

---
updated-dependencies:
- dependency-name: serve-static
  dependency-type: indirect
- dependency-name: express
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 cl/package-lock.json | 535 ++++++++++++++++++++++++++++++-------------
 1 file changed, 378 insertions(+), 157 deletions(-)

diff --git a/cl/package-lock.json b/cl/package-lock.json
index 20acad7d9c..32fdcad14f 100644
--- a/cl/package-lock.json
+++ b/cl/package-lock.json
@@ -2963,21 +2963,21 @@
       }
     },
     "node_modules/body-parser": {
-      "version": "1.20.0",
-      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.0.tgz",
-      "integrity": "sha512-DfJ+q6EPcGKZD1QWUjSpqp+Q7bDQTsQIF4zfUAtZ6qk+H/3/QRhg9CEp39ss+/T2vw0+HaidC0ecJj/DRLIaKg==",
+      "version": "1.20.3",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz",
+      "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==",
       "dev": true,
       "dependencies": {
         "bytes": "3.1.2",
-        "content-type": "~1.0.4",
+        "content-type": "~1.0.5",
         "debug": "2.6.9",
         "depd": "2.0.0",
         "destroy": "1.2.0",
         "http-errors": "2.0.0",
         "iconv-lite": "0.4.24",
         "on-finished": "2.4.1",
-        "qs": "6.10.3",
-        "raw-body": "2.5.1",
+        "qs": "6.13.0",
+        "raw-body": "2.5.2",
         "type-is": "~1.6.18",
         "unpipe": "1.0.0"
       },
@@ -3087,13 +3087,19 @@
       }
     },
     "node_modules/call-bind": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz",
-      "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==",
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
+      "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==",
       "dev": true,
       "dependencies": {
-        "function-bind": "^1.1.1",
-        "get-intrinsic": "^1.0.2"
+        "es-define-property": "^1.0.0",
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2",
+        "get-intrinsic": "^1.2.4",
+        "set-function-length": "^1.2.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
       },
       "funding": {
         "url": "https://github.com/sponsors/ljharb"
@@ -3445,9 +3451,9 @@
       ]
     },
     "node_modules/content-type": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
-      "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
+      "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
       "dev": true,
       "engines": {
         "node": ">= 0.6"
@@ -3462,9 +3468,9 @@
       }
     },
     "node_modules/cookie": {
-      "version": "0.5.0",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz",
-      "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==",
+      "version": "0.6.0",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
+      "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
       "dev": true,
       "engines": {
         "node": ">= 0.6"
@@ -3673,6 +3679,23 @@
         "node": ">= 10"
       }
     },
+    "node_modules/define-data-property": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
+      "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
+      "dev": true,
+      "dependencies": {
+        "es-define-property": "^1.0.0",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
     "node_modules/define-lazy-prop": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz",
@@ -3895,9 +3918,9 @@
       }
     },
     "node_modules/encodeurl": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
-      "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
       "dev": true,
       "engines": {
         "node": ">= 0.8"
@@ -3989,6 +4012,27 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/es-define-property": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz",
+      "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
+      "dev": true,
+      "dependencies": {
+        "get-intrinsic": "^1.2.4"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "dev": true,
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
     "node_modules/es-module-lexer": {
       "version": "1.5.4",
       "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.5.4.tgz",
@@ -4466,37 +4510,37 @@
       }
     },
     "node_modules/express": {
-      "version": "4.18.1",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.18.1.tgz",
-      "integrity": "sha512-zZBcOX9TfehHQhtupq57OF8lFZ3UZi08Y97dwFCkD8p9d/d2Y3M+ykKcwaMDEL+4qyUolgBDX6AblpR3fL212Q==",
+      "version": "4.21.0",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.21.0.tgz",
+      "integrity": "sha512-VqcNGcj/Id5ZT1LZ/cfihi3ttTn+NJmkli2eZADigjq29qTlWi/hAQ43t/VLPq8+UX06FCEx3ByOYet6ZFblng==",
       "dev": true,
       "dependencies": {
         "accepts": "~1.3.8",
         "array-flatten": "1.1.1",
-        "body-parser": "1.20.0",
+        "body-parser": "1.20.3",
         "content-disposition": "0.5.4",
         "content-type": "~1.0.4",
-        "cookie": "0.5.0",
+        "cookie": "0.6.0",
         "cookie-signature": "1.0.6",
         "debug": "2.6.9",
         "depd": "2.0.0",
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "etag": "~1.8.1",
-        "finalhandler": "1.2.0",
+        "finalhandler": "1.3.1",
         "fresh": "0.5.2",
         "http-errors": "2.0.0",
-        "merge-descriptors": "1.0.1",
+        "merge-descriptors": "1.0.3",
         "methods": "~1.1.2",
         "on-finished": "2.4.1",
         "parseurl": "~1.3.3",
-        "path-to-regexp": "0.1.7",
+        "path-to-regexp": "0.1.10",
         "proxy-addr": "~2.0.7",
-        "qs": "6.10.3",
+        "qs": "6.13.0",
         "range-parser": "~1.2.1",
         "safe-buffer": "5.2.1",
-        "send": "0.18.0",
-        "serve-static": "1.15.0",
+        "send": "0.19.0",
+        "serve-static": "1.16.2",
         "setprototypeof": "1.2.0",
         "statuses": "2.0.1",
         "type-is": "~1.6.18",
@@ -4622,13 +4666,13 @@
       }
     },
     "node_modules/finalhandler": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
-      "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==",
+      "version": "1.3.1",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
+      "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==",
       "dev": true,
       "dependencies": {
         "debug": "2.6.9",
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "on-finished": "2.4.1",
         "parseurl": "~1.3.3",
@@ -4782,9 +4826,12 @@
       }
     },
     "node_modules/function-bind": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz",
-      "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A=="
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
     },
     "node_modules/functional-red-black-tree": {
       "version": "1.0.1",
@@ -4810,14 +4857,19 @@
       }
     },
     "node_modules/get-intrinsic": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz",
-      "integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
+      "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
       "dev": true,
       "dependencies": {
-        "function-bind": "^1.1.1",
-        "has": "^1.0.3",
-        "has-symbols": "^1.0.1"
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2",
+        "has-proto": "^1.0.1",
+        "has-symbols": "^1.0.3",
+        "hasown": "^2.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
       },
       "funding": {
         "url": "https://github.com/sponsors/ljharb"
@@ -4931,6 +4983,18 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/gopd": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
+      "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
+      "dev": true,
+      "dependencies": {
+        "get-intrinsic": "^1.1.3"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
     "node_modules/graceful-fs": {
       "version": "4.2.11",
       "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -4985,10 +5049,34 @@
         "node": ">=4"
       }
     },
-    "node_modules/has-symbols": {
+    "node_modules/has-property-descriptors": {
       "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz",
-      "integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw==",
+      "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
+      "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
+      "dev": true,
+      "dependencies": {
+        "es-define-property": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-proto": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz",
+      "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==",
+      "dev": true,
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-symbols": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
+      "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
       "dev": true,
       "engines": {
         "node": ">= 0.4"
@@ -5012,6 +5100,18 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "dev": true,
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
     "node_modules/history": {
       "version": "4.10.1",
       "resolved": "https://registry.npmjs.org/history/-/history-4.10.1.tgz",
@@ -5948,10 +6048,13 @@
       }
     },
     "node_modules/merge-descriptors": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
-      "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==",
-      "dev": true
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz",
+      "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==",
+      "dev": true,
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
     },
     "node_modules/merge-stream": {
       "version": "2.0.0",
@@ -6131,10 +6234,13 @@
       }
     },
     "node_modules/object-inspect": {
-      "version": "1.11.0",
-      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.11.0.tgz",
-      "integrity": "sha512-jp7ikS6Sd3GxQfZJPyH3cjcbJF6GZPClgdV+EFygjFLQ5FmW/dRUnTd9PQ9k0JhoNDabWFbpF1yCdSWCC6gexg==",
+      "version": "1.13.2",
+      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz",
+      "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==",
       "dev": true,
+      "engines": {
+        "node": ">= 0.4"
+      },
       "funding": {
         "url": "https://github.com/sponsors/ljharb"
       }
@@ -6456,9 +6562,9 @@
       "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="
     },
     "node_modules/path-to-regexp": {
-      "version": "0.1.7",
-      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
-      "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==",
+      "version": "0.1.10",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz",
+      "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==",
       "dev": true
     },
     "node_modules/path-type": {
@@ -6721,12 +6827,12 @@
       }
     },
     "node_modules/qs": {
-      "version": "6.10.3",
-      "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.3.tgz",
-      "integrity": "sha512-wr7M2E0OFRfIfJZjKGieI8lBKb7fRCH4Fv5KNPEs7gJ8jadvotdsS08PzOKR7opXhZ/Xkjtt3WF9g38drmyRqQ==",
+      "version": "6.13.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
+      "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
       "dev": true,
       "dependencies": {
-        "side-channel": "^1.0.4"
+        "side-channel": "^1.0.6"
       },
       "engines": {
         "node": ">=0.6"
@@ -6753,9 +6859,9 @@
       }
     },
     "node_modules/raw-body": {
-      "version": "2.5.1",
-      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz",
-      "integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==",
+      "version": "2.5.2",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz",
+      "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==",
       "dev": true,
       "dependencies": {
         "bytes": "3.1.2",
@@ -7392,9 +7498,9 @@
       }
     },
     "node_modules/send": {
-      "version": "0.18.0",
-      "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
-      "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==",
+      "version": "0.19.0",
+      "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz",
+      "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==",
       "dev": true,
       "dependencies": {
         "debug": "2.6.9",
@@ -7439,6 +7545,15 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/send/node_modules/encodeurl": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
+      "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
+      "dev": true,
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/send/node_modules/mime": {
       "version": "1.6.0",
       "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
@@ -7535,15 +7650,15 @@
       "dev": true
     },
     "node_modules/serve-static": {
-      "version": "1.15.0",
-      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz",
-      "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==",
+      "version": "1.16.2",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz",
+      "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==",
       "dev": true,
       "dependencies": {
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "parseurl": "~1.3.3",
-        "send": "0.18.0"
+        "send": "0.19.0"
       },
       "engines": {
         "node": ">= 0.8.0"
@@ -7554,6 +7669,23 @@
       "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
       "integrity": "sha1-BF+XgtARrppoA93TgrJDkrPYkPc="
     },
+    "node_modules/set-function-length": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
+      "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
+      "dev": true,
+      "dependencies": {
+        "define-data-property": "^1.1.4",
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2",
+        "get-intrinsic": "^1.2.4",
+        "gopd": "^1.0.1",
+        "has-property-descriptors": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
     "node_modules/setprototypeof": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
@@ -7605,14 +7737,18 @@
       }
     },
     "node_modules/side-channel": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz",
-      "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==",
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz",
+      "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==",
       "dev": true,
       "dependencies": {
-        "call-bind": "^1.0.0",
-        "get-intrinsic": "^1.0.2",
-        "object-inspect": "^1.9.0"
+        "call-bind": "^1.0.7",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.4",
+        "object-inspect": "^1.13.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
       },
       "funding": {
         "url": "https://github.com/sponsors/ljharb"
@@ -11339,21 +11475,21 @@
       "dev": true
     },
     "body-parser": {
-      "version": "1.20.0",
-      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.0.tgz",
-      "integrity": "sha512-DfJ+q6EPcGKZD1QWUjSpqp+Q7bDQTsQIF4zfUAtZ6qk+H/3/QRhg9CEp39ss+/T2vw0+HaidC0ecJj/DRLIaKg==",
+      "version": "1.20.3",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz",
+      "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==",
       "dev": true,
       "requires": {
         "bytes": "3.1.2",
-        "content-type": "~1.0.4",
+        "content-type": "~1.0.5",
         "debug": "2.6.9",
         "depd": "2.0.0",
         "destroy": "1.2.0",
         "http-errors": "2.0.0",
         "iconv-lite": "0.4.24",
         "on-finished": "2.4.1",
-        "qs": "6.10.3",
-        "raw-body": "2.5.1",
+        "qs": "6.13.0",
+        "raw-body": "2.5.2",
         "type-is": "~1.6.18",
         "unpipe": "1.0.0"
       },
@@ -11432,13 +11568,16 @@
       "dev": true
     },
     "call-bind": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz",
-      "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==",
+      "version": "1.0.7",
+      "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
+      "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==",
       "dev": true,
       "requires": {
-        "function-bind": "^1.1.1",
-        "get-intrinsic": "^1.0.2"
+        "es-define-property": "^1.0.0",
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2",
+        "get-intrinsic": "^1.2.4",
+        "set-function-length": "^1.2.1"
       }
     },
     "callsites": {
@@ -11699,9 +11838,9 @@
       }
     },
     "content-type": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
-      "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
+      "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
       "dev": true
     },
     "convert-source-map": {
@@ -11713,9 +11852,9 @@
       }
     },
     "cookie": {
-      "version": "0.5.0",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz",
-      "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==",
+      "version": "0.6.0",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
+      "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
       "dev": true
     },
     "cookie-signature": {
@@ -11858,6 +11997,17 @@
         "execa": "^5.0.0"
       }
     },
+    "define-data-property": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
+      "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
+      "dev": true,
+      "requires": {
+        "es-define-property": "^1.0.0",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.0.1"
+      }
+    },
     "define-lazy-prop": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz",
@@ -12030,9 +12180,9 @@
       "integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q=="
     },
     "encodeurl": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
-      "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
       "dev": true
     },
     "enhanced-resolve": {
@@ -12100,6 +12250,21 @@
         "unbox-primitive": "^1.0.1"
       }
     },
+    "es-define-property": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz",
+      "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
+      "dev": true,
+      "requires": {
+        "get-intrinsic": "^1.2.4"
+      }
+    },
+    "es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "dev": true
+    },
     "es-module-lexer": {
       "version": "1.5.4",
       "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.5.4.tgz",
@@ -12441,37 +12606,37 @@
       }
     },
     "express": {
-      "version": "4.18.1",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.18.1.tgz",
-      "integrity": "sha512-zZBcOX9TfehHQhtupq57OF8lFZ3UZi08Y97dwFCkD8p9d/d2Y3M+ykKcwaMDEL+4qyUolgBDX6AblpR3fL212Q==",
+      "version": "4.21.0",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.21.0.tgz",
+      "integrity": "sha512-VqcNGcj/Id5ZT1LZ/cfihi3ttTn+NJmkli2eZADigjq29qTlWi/hAQ43t/VLPq8+UX06FCEx3ByOYet6ZFblng==",
       "dev": true,
       "requires": {
         "accepts": "~1.3.8",
         "array-flatten": "1.1.1",
-        "body-parser": "1.20.0",
+        "body-parser": "1.20.3",
         "content-disposition": "0.5.4",
         "content-type": "~1.0.4",
-        "cookie": "0.5.0",
+        "cookie": "0.6.0",
         "cookie-signature": "1.0.6",
         "debug": "2.6.9",
         "depd": "2.0.0",
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "etag": "~1.8.1",
-        "finalhandler": "1.2.0",
+        "finalhandler": "1.3.1",
         "fresh": "0.5.2",
         "http-errors": "2.0.0",
-        "merge-descriptors": "1.0.1",
+        "merge-descriptors": "1.0.3",
         "methods": "~1.1.2",
         "on-finished": "2.4.1",
         "parseurl": "~1.3.3",
-        "path-to-regexp": "0.1.7",
+        "path-to-regexp": "0.1.10",
         "proxy-addr": "~2.0.7",
-        "qs": "6.10.3",
+        "qs": "6.13.0",
         "range-parser": "~1.2.1",
         "safe-buffer": "5.2.1",
-        "send": "0.18.0",
-        "serve-static": "1.15.0",
+        "send": "0.19.0",
+        "serve-static": "1.16.2",
         "setprototypeof": "1.2.0",
         "statuses": "2.0.1",
         "type-is": "~1.6.18",
@@ -12567,13 +12732,13 @@
       }
     },
     "finalhandler": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
-      "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==",
+      "version": "1.3.1",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
+      "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==",
       "dev": true,
       "requires": {
         "debug": "2.6.9",
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "on-finished": "2.4.1",
         "parseurl": "~1.3.3",
@@ -12684,9 +12849,9 @@
       "optional": true
     },
     "function-bind": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz",
-      "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A=="
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="
     },
     "functional-red-black-tree": {
       "version": "1.0.1",
@@ -12706,14 +12871,16 @@
       "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg=="
     },
     "get-intrinsic": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz",
-      "integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
+      "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
       "dev": true,
       "requires": {
-        "function-bind": "^1.1.1",
-        "has": "^1.0.3",
-        "has-symbols": "^1.0.1"
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2",
+        "has-proto": "^1.0.1",
+        "has-symbols": "^1.0.3",
+        "hasown": "^2.0.0"
       }
     },
     "get-stdin": {
@@ -12793,6 +12960,15 @@
         }
       }
     },
+    "gopd": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
+      "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
+      "dev": true,
+      "requires": {
+        "get-intrinsic": "^1.1.3"
+      }
+    },
     "graceful-fs": {
       "version": "4.2.11",
       "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -12832,10 +13008,25 @@
       "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
       "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0="
     },
-    "has-symbols": {
+    "has-property-descriptors": {
       "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz",
-      "integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw==",
+      "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
+      "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
+      "dev": true,
+      "requires": {
+        "es-define-property": "^1.0.0"
+      }
+    },
+    "has-proto": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz",
+      "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==",
+      "dev": true
+    },
+    "has-symbols": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
+      "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
       "dev": true
     },
     "has-tostringtag": {
@@ -12847,6 +13038,15 @@
         "has-symbols": "^1.0.2"
       }
     },
+    "hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "dev": true,
+      "requires": {
+        "function-bind": "^1.1.2"
+      }
+    },
     "history": {
       "version": "4.10.1",
       "resolved": "https://registry.npmjs.org/history/-/history-4.10.1.tgz",
@@ -13533,9 +13733,9 @@
       }
     },
     "merge-descriptors": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
-      "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==",
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz",
+      "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==",
       "dev": true
     },
     "merge-stream": {
@@ -13665,9 +13865,9 @@
       "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM="
     },
     "object-inspect": {
-      "version": "1.11.0",
-      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.11.0.tgz",
-      "integrity": "sha512-jp7ikS6Sd3GxQfZJPyH3cjcbJF6GZPClgdV+EFygjFLQ5FmW/dRUnTd9PQ9k0JhoNDabWFbpF1yCdSWCC6gexg==",
+      "version": "1.13.2",
+      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz",
+      "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==",
       "dev": true
     },
     "object-keys": {
@@ -13899,9 +14099,9 @@
       "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="
     },
     "path-to-regexp": {
-      "version": "0.1.7",
-      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
-      "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==",
+      "version": "0.1.10",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz",
+      "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==",
       "dev": true
     },
     "path-type": {
@@ -14080,12 +14280,12 @@
       "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A=="
     },
     "qs": {
-      "version": "6.10.3",
-      "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.3.tgz",
-      "integrity": "sha512-wr7M2E0OFRfIfJZjKGieI8lBKb7fRCH4Fv5KNPEs7gJ8jadvotdsS08PzOKR7opXhZ/Xkjtt3WF9g38drmyRqQ==",
+      "version": "6.13.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
+      "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
       "dev": true,
       "requires": {
-        "side-channel": "^1.0.4"
+        "side-channel": "^1.0.6"
       }
     },
     "randombytes": {
@@ -14103,9 +14303,9 @@
       "dev": true
     },
     "raw-body": {
-      "version": "2.5.1",
-      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz",
-      "integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==",
+      "version": "2.5.2",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz",
+      "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==",
       "dev": true,
       "requires": {
         "bytes": "3.1.2",
@@ -14611,9 +14811,9 @@
       "dev": true
     },
     "send": {
-      "version": "0.18.0",
-      "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
-      "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==",
+      "version": "0.19.0",
+      "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz",
+      "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==",
       "dev": true,
       "requires": {
         "debug": "2.6.9",
@@ -14654,6 +14854,12 @@
           "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
           "dev": true
         },
+        "encodeurl": {
+          "version": "1.0.2",
+          "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
+          "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
+          "dev": true
+        },
         "mime": {
           "version": "1.6.0",
           "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
@@ -14739,15 +14945,15 @@
       }
     },
     "serve-static": {
-      "version": "1.15.0",
-      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz",
-      "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==",
+      "version": "1.16.2",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz",
+      "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==",
       "dev": true,
       "requires": {
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "parseurl": "~1.3.3",
-        "send": "0.18.0"
+        "send": "0.19.0"
       }
     },
     "set-blocking": {
@@ -14755,6 +14961,20 @@
       "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz",
       "integrity": "sha1-BF+XgtARrppoA93TgrJDkrPYkPc="
     },
+    "set-function-length": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
+      "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
+      "dev": true,
+      "requires": {
+        "define-data-property": "^1.1.4",
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2",
+        "get-intrinsic": "^1.2.4",
+        "gopd": "^1.0.1",
+        "has-property-descriptors": "^1.0.2"
+      }
+    },
     "setprototypeof": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
@@ -14794,14 +15014,15 @@
       }
     },
     "side-channel": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz",
-      "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==",
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz",
+      "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==",
       "dev": true,
       "requires": {
-        "call-bind": "^1.0.0",
-        "get-intrinsic": "^1.0.2",
-        "object-inspect": "^1.9.0"
+        "call-bind": "^1.0.7",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.4",
+        "object-inspect": "^1.13.1"
       }
     },
     "signal-exit": {

From 64630b92ea6cb914c98ef76c743c0c80a7ffce9e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Sep 2024 14:41:12 +0000
Subject: [PATCH 371/372] build(deps-dev): bump braces from 3.0.2 to 3.0.3 in
 /cl

Bumps [braces](https://github.com/micromatch/braces) from 3.0.2 to 3.0.3.
- [Changelog](https://github.com/micromatch/braces/blob/master/CHANGELOG.md)
- [Commits](https://github.com/micromatch/braces/compare/3.0.2...3.0.3)

---
updated-dependencies:
- dependency-name: braces
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 cl/package-lock.json | 236 ++++++++++++++-----------------------------
 1 file changed, 78 insertions(+), 158 deletions(-)

diff --git a/cl/package-lock.json b/cl/package-lock.json
index 32fdcad14f..22c32aa3ec 100644
--- a/cl/package-lock.json
+++ b/cl/package-lock.json
@@ -3041,6 +3041,18 @@
         "concat-map": "0.0.1"
       }
     },
+    "node_modules/braces": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
+      "dev": true,
+      "dependencies": {
+        "fill-range": "^7.1.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/browserslist": {
       "version": "4.23.3",
       "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.3.tgz",
@@ -3180,51 +3192,6 @@
         "fsevents": "~2.3.2"
       }
     },
-    "node_modules/chokidar/node_modules/braces": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-      "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
-      "dev": true,
-      "dependencies": {
-        "fill-range": "^7.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/chokidar/node_modules/fill-range": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-      "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
-      "dev": true,
-      "dependencies": {
-        "to-regex-range": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/chokidar/node_modules/is-number": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
-      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.12.0"
-      }
-    },
-    "node_modules/chokidar/node_modules/to-regex-range": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
-      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
-      "dev": true,
-      "dependencies": {
-        "is-number": "^7.0.0"
-      },
-      "engines": {
-        "node": ">=8.0"
-      }
-    },
     "node_modules/chrome-trace-event": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz",
@@ -4665,6 +4632,18 @@
         "node": "^10.12.0 || >=12.0.0"
       }
     },
+    "node_modules/fill-range": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
+      "dev": true,
+      "dependencies": {
+        "to-regex-range": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/finalhandler": {
       "version": "1.3.1",
       "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
@@ -5261,39 +5240,6 @@
         }
       }
     },
-    "node_modules/http-proxy-middleware/node_modules/braces": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-      "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
-      "dev": true,
-      "dependencies": {
-        "fill-range": "^7.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/http-proxy-middleware/node_modules/fill-range": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-      "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
-      "dev": true,
-      "dependencies": {
-        "to-regex-range": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/http-proxy-middleware/node_modules/is-number": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
-      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
-      "dev": true,
-      "engines": {
-        "node": ">=0.12.0"
-      }
-    },
     "node_modules/http-proxy-middleware/node_modules/micromatch": {
       "version": "4.0.5",
       "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz",
@@ -5307,18 +5253,6 @@
         "node": ">=8.6"
       }
     },
-    "node_modules/http-proxy-middleware/node_modules/to-regex-range": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
-      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
-      "dev": true,
-      "dependencies": {
-        "is-number": "^7.0.0"
-      },
-      "engines": {
-        "node": ">=8.0"
-      }
-    },
     "node_modules/human-signals": {
       "version": "2.1.0",
       "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz",
@@ -5599,6 +5533,15 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/is-number": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
+      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
+      "dev": true,
+      "engines": {
+        "node": ">=0.12.0"
+      }
+    },
     "node_modules/is-number-object": {
       "version": "1.0.6",
       "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.0.6.tgz",
@@ -8265,6 +8208,18 @@
         "node": ">=4"
       }
     },
+    "node_modules/to-regex-range": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
+      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
+      "dev": true,
+      "dependencies": {
+        "is-number": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
     "node_modules/toidentifier": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
@@ -11545,6 +11500,15 @@
         "concat-map": "0.0.1"
       }
     },
+    "braces": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
+      "dev": true,
+      "requires": {
+        "fill-range": "^7.1.1"
+      }
+    },
     "browserslist": {
       "version": "4.23.3",
       "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.3.tgz",
@@ -11619,41 +11583,6 @@
         "is-glob": "~4.0.1",
         "normalize-path": "~3.0.0",
         "readdirp": "~3.6.0"
-      },
-      "dependencies": {
-        "braces": {
-          "version": "3.0.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-          "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
-          "dev": true,
-          "requires": {
-            "fill-range": "^7.0.1"
-          }
-        },
-        "fill-range": {
-          "version": "7.0.1",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-          "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
-          "dev": true,
-          "requires": {
-            "to-regex-range": "^5.0.1"
-          }
-        },
-        "is-number": {
-          "version": "7.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
-          "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
-          "dev": true
-        },
-        "to-regex-range": {
-          "version": "5.0.1",
-          "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
-          "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
-          "dev": true,
-          "requires": {
-            "is-number": "^7.0.0"
-          }
-        }
       }
     },
     "chrome-trace-event": {
@@ -12731,6 +12660,15 @@
         "flat-cache": "^3.0.4"
       }
     },
+    "fill-range": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
+      "dev": true,
+      "requires": {
+        "to-regex-range": "^5.0.1"
+      }
+    },
     "finalhandler": {
       "version": "1.3.1",
       "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
@@ -13170,30 +13108,6 @@
         "micromatch": "^4.0.2"
       },
       "dependencies": {
-        "braces": {
-          "version": "3.0.2",
-          "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-          "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
-          "dev": true,
-          "requires": {
-            "fill-range": "^7.0.1"
-          }
-        },
-        "fill-range": {
-          "version": "7.0.1",
-          "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-          "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
-          "dev": true,
-          "requires": {
-            "to-regex-range": "^5.0.1"
-          }
-        },
-        "is-number": {
-          "version": "7.0.0",
-          "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
-          "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
-          "dev": true
-        },
         "micromatch": {
           "version": "4.0.5",
           "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz",
@@ -13203,15 +13117,6 @@
             "braces": "^3.0.2",
             "picomatch": "^2.3.1"
           }
-        },
-        "to-regex-range": {
-          "version": "5.0.1",
-          "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
-          "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
-          "dev": true,
-          "requires": {
-            "is-number": "^7.0.0"
-          }
         }
       }
     },
@@ -13403,6 +13308,12 @@
       "integrity": "sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==",
       "dev": true
     },
+    "is-number": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
+      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
+      "dev": true
+    },
     "is-number-object": {
       "version": "1.0.6",
       "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.0.6.tgz",
@@ -15402,6 +15313,15 @@
       "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz",
       "integrity": "sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4="
     },
+    "to-regex-range": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
+      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
+      "dev": true,
+      "requires": {
+        "is-number": "^7.0.0"
+      }
+    },
     "toidentifier": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",

From 0850252788c64872c968e251ce08fea3d3eadc5f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 17 Sep 2024 15:22:28 +0000
Subject: [PATCH 372/372] build(deps): bump peter-evans/create-pull-request
 from 3 to 7

Bumps [peter-evans/create-pull-request](https://github.com/peter-evans/create-pull-request) from 3 to 7.
- [Release notes](https://github.com/peter-evans/create-pull-request/releases)
- [Commits](https://github.com/peter-evans/create-pull-request/compare/v3...v7)

---
updated-dependencies:
- dependency-name: peter-evans/create-pull-request
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/flp-dependencies-pr.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/flp-dependencies-pr.yml b/.github/workflows/flp-dependencies-pr.yml
index 710082cdd2..a3d4fdf4ee 100644
--- a/.github/workflows/flp-dependencies-pr.yml
+++ b/.github/workflows/flp-dependencies-pr.yml
@@ -24,7 +24,7 @@ jobs:
           poetry update courts-db eyecite juriscraper reporters-db
 
       - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v3
+        uses: peter-evans/create-pull-request@v7
         with:
           commit-message: Update freelawproject dependencies
           title: Update freelawproject dependencies