From fd9f7e22926d2d4d7ea52241c3a6387394f56e1d Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 27 Sep 2023 20:40:28 -0400 Subject: [PATCH 01/41] recap/models.py: xref UPLOAD_TYPE to recap-chrome's list --- cl/recap/models.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cl/recap/models.py b/cl/recap/models.py index c3a570f4b8..fa84195ef4 100644 --- a/cl/recap/models.py +++ b/cl/recap/models.py @@ -9,6 +9,10 @@ class UPLOAD_TYPE: + """This enumeration of upload types should be in sync with + recap-chrome/src/pacer.js: UPLOAD_TYPES [plural, sic] + """ + DOCKET = 1 ATTACHMENT_PAGE = 2 PDF = 3 From cb94c90e5219fc8725fc08ce21ce190c4fbc46ac Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 27 Sep 2023 20:49:08 -0400 Subject: [PATCH 02/41] recap/models.py: ACMS json is upload type 16 --- cl/recap/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cl/recap/models.py b/cl/recap/models.py index fa84195ef4..df758b8ecd 100644 --- a/cl/recap/models.py +++ b/cl/recap/models.py @@ -28,6 +28,7 @@ class UPLOAD_TYPE: APPELLATE_CASE_QUERY_PAGE = 13 CASE_QUERY_RESULT_PAGE = 14 APPELLATE_CASE_QUERY_RESULT_PAGE = 15 + ACMS_DOCKET_JSON = 16 NAMES = ( (DOCKET, "HTML Docket"), (ATTACHMENT_PAGE, "HTML attachment page"), @@ -44,6 +45,7 @@ class UPLOAD_TYPE: (APPELLATE_CASE_QUERY_PAGE, "Appellate Case query page"), (CASE_QUERY_RESULT_PAGE, "Case query result page"), (APPELLATE_CASE_QUERY_RESULT_PAGE, "Appellate Case query result page"), + (ACMS_DOCKET_JSON, "ACMS docket JSON object"), ) From 85325c88571b2e3ab30e8cf0e0be27a864744aca Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 28 Sep 2023 16:46:39 -0400 Subject: [PATCH 03/41] recap/tasks.py: Handle ACMS_DOCKET_JSON --- cl/recap/tasks.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py index c3a505ad0e..72bdf4e770 100644 --- a/cl/recap/tasks.py +++ b/cl/recap/tasks.py @@ -23,6 +23,7 @@ from juriscraper.lib.exceptions import PacerLoginException, ParsingException from juriscraper.lib.string_utils import CaseNameTweaker, harmonize from juriscraper.pacer import ( + ACMSDocketReport, AppellateDocketReport, AttachmentPage, CaseQuery, @@ -140,6 +141,8 @@ async def process_recap_upload(pq: ProcessingQueue) -> None: await sync_to_async(process_recap_appellate_case_query_result_page)( pq.pk ) + elif pq.upload_type == UPLOAD_TYPE.ACMS_DOCKET_JSON: + docket = await process_recap_acms_docket(pq.pk) def do_pacer_fetch(fq: PacerFetchQueue): @@ -975,6 +978,12 @@ def parse_appellate_text(court_id, text): return report.data +def parse_acms_json(court_id, json): + report = ACMSDocketReport(court_id) + report._parse_text(json) + return report.data + + async def process_recap_appellate_docket(pk): """Process an uploaded appellate docket from the RECAP API endpoint. @@ -1074,6 +1083,102 @@ async def process_recap_appellate_docket(pk): } +async def process_recap_acms_docket(pk): + """Process uploaded ACMS appellate docket JSON from the RECAP API endpoint. + + :param pk: The primary key of the processing queue item you want to work + on. + :returns: A dict of the form: + + { + // The PK of the docket that's created or updated + 'docket_pk': 22, + // A boolean indicating whether a new docket entry or + // recap document was created (implying a Solr needs + // updating). + 'content_updated': True, + } + + This value is a dict so that it can be ingested in a Celery chain. + + """ + start_time = now() + pq = await ProcessingQueue.objects.aget(pk=pk) + await mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS) + logger.info(f"Processing ACMS RECAP item (debug is: {pq.debug}): {pq}") + + try: + text = pq.filepath_local.read().decode() + except IOError as exc: + msg = f"Internal processing error ({exc.errno}: {exc.strerror})." + await mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED) + return None + + if process.current_process().daemon: + # yyy + data = parse_appellate_text(map_cl_to_pacer_id(pq.court_id), text) + else: + with concurrent.futures.ProcessPoolExecutor() as pool: + data = await asyncio.get_running_loop().run_in_executor( + pool, + parse_appellate_text, + map_cl_to_pacer_id(pq.court_id), + text, + ) + logger.info(f"Parsing completed of item {pq}") + + if data == {}: + # Not really a docket. Some sort of invalid document (see Juriscraper). + msg = "Not a valid docket upload." + await mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT) + return None + + # Merge the contents of the docket into CL. + d = await find_docket_object( + pq.court_id, pq.pacer_case_id, data["docket_number"] + ) + + d.add_recap_source() + await update_docket_metadata(d, data) + d, og_info = await update_docket_appellate_metadata(d, data) + if not d.pacer_case_id: + d.pacer_case_id = pq.pacer_case_id + + if pq.debug: + await mark_pq_successful(pq, d_id=d.pk) + return {"docket_pk": d.pk, "content_updated": False} + + if og_info is not None: + await og_info.asave() + d.originating_court_information = og_info + await d.asave() + + # Add the JSON to the docket in case we need it someday. + # xxx PacerHtmlFiles is ok for JSON? + pacer_file = await PacerHtmlFiles.objects.acreate( + content_object=d, upload_type=UPLOAD_TYPE.APPELLATE_DOCKET + ) + await sync_to_async(pacer_file.filepath.save)( + "docket.html", # We only care about the ext w/S3PrivateUUIDStorageTest + ContentFile(text.encode()), + ) + + des_returned, rds_created, content_updated = await add_docket_entries( + d, data["docket_entries"] + ) + await sync_to_async(add_parties_and_attorneys)(d, data["parties"]) + await process_orphan_documents(rds_created, pq.court_id, d.date_filed) + if content_updated: + newly_enqueued = enqueue_docket_alert(d.pk) + if newly_enqueued: + await sync_to_async(send_alert_and_webhook.delay)(d.pk, start_time) + await mark_pq_successful(pq, d_id=d.pk) + return { + "docket_pk": d.pk, + "content_updated": bool(rds_created or content_updated), + } + + async def process_recap_appellate_attachment( pk: int, ) -> Optional[Tuple[int, str, list[RECAPDocument]]]: From 3b6644c27acdd24c989f57916f9108d34c159323 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 28 Sep 2023 16:47:49 -0400 Subject: [PATCH 04/41] recap/models.py: teach ProcessingQueue() about ACMS JSON --- cl/recap/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cl/recap/models.py b/cl/recap/models.py index df758b8ecd..08672f43a3 100644 --- a/cl/recap/models.py +++ b/cl/recap/models.py @@ -199,6 +199,7 @@ def __str__(self) -> str: UPLOAD_TYPE.DOCKET_HISTORY_REPORT, UPLOAD_TYPE.APPELLATE_DOCKET, UPLOAD_TYPE.DOCUMENT_ZIP, + UPLOAD_TYPE.ACMS_DOCKET_JSON, ]: return "ProcessingQueue %s: %s case #%s (%s)" % ( self.pk, From 66e1e497195cf2bad192f913be66876ebba71ab1 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Mon, 2 Oct 2023 15:28:12 -0300 Subject: [PATCH 05/41] fix(recap): Added upload type noop migration. --- ..._alter_processingqueue_upload_type_noop.py | 65 +++++++++++++++++++ ...alter_processingqueue_upload_type_noop.sql | 10 +++ 2 files changed, 75 insertions(+) create mode 100644 cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py create mode 100644 cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql diff --git a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py new file mode 100644 index 0000000000..d4a0d142b6 --- /dev/null +++ b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py @@ -0,0 +1,65 @@ +# Generated by Django 4.2.4 on 2023-10-02 18:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ( + "recap", + "0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx", + ), + ] + + operations = [ + migrations.AlterField( + model_name="pacerhtmlfiles", + name="upload_type", + field=models.SmallIntegerField( + choices=[ + (1, "HTML Docket"), + (2, "HTML attachment page"), + (3, "PDF"), + (4, "Docket history report"), + (5, "Appellate HTML docket"), + (6, "Appellate HTML attachment page"), + (7, "Internet Archive XML docket"), + (8, "Case report (iquery.pl) page"), + (9, "Claims register page"), + (10, "Zip archive of RECAP Documents"), + (11, "Email in the SES storage format"), + (12, "Case query page"), + (13, "Appellate Case query page"), + (14, "Case query result page"), + (15, "Appellate Case query result page"), + (16, "ACMS docket JSON object"), + ], + help_text="The type of object that is uploaded", + ), + ), + migrations.AlterField( + model_name="processingqueue", + name="upload_type", + field=models.SmallIntegerField( + choices=[ + (1, "HTML Docket"), + (2, "HTML attachment page"), + (3, "PDF"), + (4, "Docket history report"), + (5, "Appellate HTML docket"), + (6, "Appellate HTML attachment page"), + (7, "Internet Archive XML docket"), + (8, "Case report (iquery.pl) page"), + (9, "Claims register page"), + (10, "Zip archive of RECAP Documents"), + (11, "Email in the SES storage format"), + (12, "Case query page"), + (13, "Appellate Case query page"), + (14, "Case query result page"), + (15, "Appellate Case query result page"), + (16, "ACMS docket JSON object"), + ], + help_text="The type of object that is uploaded", + ), + ), + ] diff --git a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql new file mode 100644 index 0000000000..b0d9d1e378 --- /dev/null +++ b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql @@ -0,0 +1,10 @@ +BEGIN; +-- +-- Alter field upload_type on pacerhtmlfiles +-- +-- (no-op) +-- +-- Alter field upload_type on processingqueue +-- +-- (no-op) +COMMIT; From 61a3eb22a32887eb372dcf25bf8d9c41017ce1c7 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Mon, 2 Oct 2023 18:57:26 -0400 Subject: [PATCH 06/41] recap: Relax `pacer_case_id` dash/hyphen constraint Allow dashes in `pacer_case_id` as long as there is not a single one. The concern here is the a single dash (aka hyphen) can look like a docket number, and there was a point in time where this field was overloaded with appellate docket numbers. Explain a little better why this check exists. Add a test. --- cl/recap/api_serializers.py | 6 ++++-- cl/recap/tests.py | 23 +++++++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/cl/recap/api_serializers.py b/cl/recap/api_serializers.py index 9d3cd6f463..ee1423a3e4 100644 --- a/cl/recap/api_serializers.py +++ b/cl/recap/api_serializers.py @@ -161,9 +161,11 @@ def validate(self, attrs): "uploads." ) - if "-" in attrs.get("pacer_case_id"): + dashes = attrs.get("pacer_case_id").count("-") + if dashes == 1: raise ValidationError( - "PACER case ID can not contains dashes -" + "PACER case ID can not contain a single (-); " + "that looks like a docket number." ) return attrs diff --git a/cl/recap/tests.py b/cl/recap/tests.py index cbe63debea..a3e2f694bc 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -605,7 +605,26 @@ def test_recap_upload_validate_pacer_case_id(self, mock): j = json.loads(r.content) self.assertEqual(r.status_code, HTTP_400_BAD_REQUEST) self.assertIn( - "PACER case ID can not contains dashes -", j["non_field_errors"][0] + "PACER case ID can not contain a single (-); looks like a docket number.", + j["non_field_errors"][0], + ) + + def test_recap_upload_validate_acms_pacer_case_id(self, mock): + """Can we properly validate the pacer_case_id doesn't contain a dash -?""" + self.data.update( + { + "upload_type": UPLOAD_TYPE.ACMS_DOCKET_JSON, + "document_number": "", + "pacer_case_id": "34cacf7f-52d5-4d1f-b4f0-0542b429f674", + } + ) + del self.data["pacer_doc_id"] + r = self.client.post(self.path, self.data) + j = json.loads(r.content) + self.assertEqual(r.status_code, HTTP_400_BAD_REQUEST) + self.assertIn( + "PACER case ID can not contain a single (-); looks like a docket number.", + j["non_field_errors"][0], ) @@ -759,7 +778,7 @@ def test_recap_fetch_validate_pacer_case_id(self, mock): serialized_fq.is_valid() self.assertIn( serialized_fq.errors["non_field_errors"][0], - "PACER case ID can not contains dashes -", + "PACER case ID can not contain a single (-); looks like a docket number.", ) def test_key_serialization_with_client_code(self, mock) -> None: From f28f30cfcc8992b3a3fafc8cb98a15c7c81f0ab0 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Tue, 3 Oct 2023 12:22:27 -0400 Subject: [PATCH 07/41] recap/models.py: Fix typo in `pacer_case_id` help_text As long as we are doing a migration for this PR, @albertisfu, can you include this fix? Or not, if it complicates your world. --- cl/recap/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cl/recap/models.py b/cl/recap/models.py index 08672f43a3..32e2530de1 100644 --- a/cl/recap/models.py +++ b/cl/recap/models.py @@ -119,7 +119,7 @@ class ProcessingQueue(AbstractDateTimeModel): on_delete=models.RESTRICT, ) pacer_case_id = models.CharField( - help_text="The cased ID provided by PACER.", + help_text="The case ID provided by PACER.", max_length=100, db_index=True, blank=True, From aa69a7d5d7e0087ddca45a7db7ed44131805dbb6 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Tue, 3 Oct 2023 12:26:07 -0400 Subject: [PATCH 08/41] recap/tests.py: oops, typo in test string You'd think I would have caught this earlier, oops. --- cl/recap/tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cl/recap/tests.py b/cl/recap/tests.py index a3e2f694bc..7463c15a9d 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -605,7 +605,7 @@ def test_recap_upload_validate_pacer_case_id(self, mock): j = json.loads(r.content) self.assertEqual(r.status_code, HTTP_400_BAD_REQUEST) self.assertIn( - "PACER case ID can not contain a single (-); looks like a docket number.", + "PACER case ID can not contain a single (-); that looks like a docket number.", j["non_field_errors"][0], ) @@ -623,7 +623,7 @@ def test_recap_upload_validate_acms_pacer_case_id(self, mock): j = json.loads(r.content) self.assertEqual(r.status_code, HTTP_400_BAD_REQUEST) self.assertIn( - "PACER case ID can not contain a single (-); looks like a docket number.", + "PACER case ID can not contain a single (-); that looks like a docket number.", j["non_field_errors"][0], ) From b4e2a0d9451191059e09a9f34e98872156986d79 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 3 Oct 2023 15:37:05 -0300 Subject: [PATCH 09/41] fix(recap): Updated ProcessingQueue migrations. - Fixed pacer_case_id typo in migration. - Increased pacer_doc_id length to 64 - Added new field acms_document_guid - Added new UploadType --- ...alter_processingqueue_upload_type_noop.sql | 10 ----- ...noop.py => 0013_processingqueue_update.py} | 38 ++++++++++++++++++- .../0013_processingqueue_update.sql | 27 +++++++++++++ cl/recap/models.py | 10 ++++- 4 files changed, 73 insertions(+), 12 deletions(-) delete mode 100644 cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql rename cl/recap/migrations/{0013_alter_processingqueue_upload_type_noop.py => 0013_processingqueue_update.py} (67%) create mode 100644 cl/recap/migrations/0013_processingqueue_update.sql diff --git a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql deleted file mode 100644 index b0d9d1e378..0000000000 --- a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql +++ /dev/null @@ -1,10 +0,0 @@ -BEGIN; --- --- Alter field upload_type on pacerhtmlfiles --- --- (no-op) --- --- Alter field upload_type on processingqueue --- --- (no-op) -COMMIT; diff --git a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py b/cl/recap/migrations/0013_processingqueue_update.py similarity index 67% rename from cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py rename to cl/recap/migrations/0013_processingqueue_update.py index d4a0d142b6..044cd226df 100644 --- a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py +++ b/cl/recap/migrations/0013_processingqueue_update.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.4 on 2023-10-02 18:25 +# Generated by Django 4.2.4 on 2023-10-03 18:29 from django.db import migrations, models @@ -12,6 +12,15 @@ class Migration(migrations.Migration): ] operations = [ + migrations.AddField( + model_name="processingqueue", + name="acms_document_guid", + field=models.CharField( + blank=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), migrations.AlterField( model_name="pacerhtmlfiles", name="upload_type", @@ -37,6 +46,26 @@ class Migration(migrations.Migration): help_text="The type of object that is uploaded", ), ), + migrations.AlterField( + model_name="processingqueue", + name="pacer_case_id", + field=models.CharField( + blank=True, + db_index=True, + help_text="The case ID provided by PACER.", + max_length=100, + ), + ), + migrations.AlterField( + model_name="processingqueue", + name="pacer_doc_id", + field=models.CharField( + blank=True, + db_index=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), migrations.AlterField( model_name="processingqueue", name="upload_type", @@ -62,4 +91,11 @@ class Migration(migrations.Migration): help_text="The type of object that is uploaded", ), ), + migrations.AddIndex( + model_name="processingqueue", + index=models.Index( + fields=["acms_document_guid"], + name="recap_proce_acms_do_2e7cae_idx", + ), + ), ] diff --git a/cl/recap/migrations/0013_processingqueue_update.sql b/cl/recap/migrations/0013_processingqueue_update.sql new file mode 100644 index 0000000000..8e2008604b --- /dev/null +++ b/cl/recap/migrations/0013_processingqueue_update.sql @@ -0,0 +1,27 @@ +BEGIN; +-- +-- Add field acms_document_guid to processingqueue +-- +ALTER TABLE "recap_processingqueue" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; +ALTER TABLE "recap_processingqueue" ALTER COLUMN "acms_document_guid" DROP DEFAULT; +-- +-- Alter field upload_type on pacerhtmlfiles +-- +-- (no-op) +-- +-- Alter field pacer_case_id on processingqueue +-- +-- (no-op) +-- +-- Alter field pacer_doc_id on processingqueue +-- +ALTER TABLE "recap_processingqueue" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); +-- +-- Alter field upload_type on processingqueue +-- +-- (no-op) +-- +-- Create index recap_proce_acms_do_2e7cae_idx on field(s) acms_document_guid of model processingqueue +-- +CREATE INDEX "recap_proce_acms_do_2e7cae_idx" ON "recap_processingqueue" ("acms_document_guid"); +COMMIT; diff --git a/cl/recap/models.py b/cl/recap/models.py index 32e2530de1..840048c6f9 100644 --- a/cl/recap/models.py +++ b/cl/recap/models.py @@ -126,10 +126,15 @@ class ProcessingQueue(AbstractDateTimeModel): ) pacer_doc_id = models.CharField( help_text="The ID of the document in PACER.", - max_length=32, # Same as in RECAP + max_length=64, # Increased to support storing docketEntryId from ACMS. blank=True, db_index=True, ) + acms_document_guid = models.CharField( + help_text="The ID of the document in PACER.", + max_length=64, + blank=True, + ) document_number = models.BigIntegerField( help_text="The docket entry number for the document.", blank=True, @@ -223,6 +228,9 @@ class Meta: permissions = ( ("has_recap_upload_access", "Can upload documents to RECAP."), ) + indexes = [ + models.Index(fields=["acms_document_guid"]), + ] @property def file_contents(self) -> str: From a6f42abf1fa06b3dff074ece2e6b68843211ee13 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 3 Oct 2023 16:02:33 -0300 Subject: [PATCH 10/41] fix(search): Search model changes and migrations. - Fixed pacer_case_id typos. - Increased pacer_doc_id length to 64 in RECAPDocument - Added new field acms_document_guid in RECAPDocument --- .../0022_search_models_update _customers.sql | 34 +++ .../migrations/0022_search_models_update.py | 202 ++++++++++++++++++ .../migrations/0022_search_models_update.sql | 60 ++++++ cl/search/models.py | 20 +- 4 files changed, 309 insertions(+), 7 deletions(-) create mode 100644 cl/search/migrations/0022_search_models_update _customers.sql create mode 100644 cl/search/migrations/0022_search_models_update.py create mode 100644 cl/search/migrations/0022_search_models_update.sql diff --git a/cl/search/migrations/0022_search_models_update _customers.sql b/cl/search/migrations/0022_search_models_update _customers.sql new file mode 100644 index 0000000000..c99cfcaf77 --- /dev/null +++ b/cl/search/migrations/0022_search_models_update _customers.sql @@ -0,0 +1,34 @@ +BEGIN; + +-- +-- Add field acms_document_guid to claimhistory +-- +ALTER TABLE "search_claimhistory" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; +ALTER TABLE "search_claimhistory" ALTER COLUMN "acms_document_guid" DROP DEFAULT; +-- +-- Add field acms_document_guid to recapdocument +-- +ALTER TABLE "search_recapdocument" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; +ALTER TABLE "search_recapdocument" ALTER COLUMN "acms_document_guid" DROP DEFAULT; +-- +-- Alter field pacer_case_id on claimhistory +-- +-- (no-op) +-- +-- Alter field pacer_doc_id on claimhistory +-- +ALTER TABLE "search_claimhistory" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); +CREATE INDEX "search_claimhistory_pacer_doc_id_ddcc4bdf" ON "search_claimhistory" ("pacer_doc_id"); +CREATE INDEX "search_claimhistory_pacer_doc_id_ddcc4bdf_like" ON "search_claimhistory" ("pacer_doc_id" varchar_pattern_ops); +-- +-- Alter field pacer_case_id on docket +-- +-- (no-op) +-- +-- Alter field pacer_doc_id on recapdocument +-- +ALTER TABLE "search_recapdocument" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); +CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9" ON "search_recapdocument" ("pacer_doc_id"); +CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9_like" ON "search_recapdocument" ("pacer_doc_id" varchar_pattern_ops); + +COMMIT; diff --git a/cl/search/migrations/0022_search_models_update.py b/cl/search/migrations/0022_search_models_update.py new file mode 100644 index 0000000000..26e647e201 --- /dev/null +++ b/cl/search/migrations/0022_search_models_update.py @@ -0,0 +1,202 @@ +# Generated by Django 4.2.4 on 2023-10-03 18:41 + +import cl.lib.fields +from django.db import migrations, models +import pgtrigger.compiler +import pgtrigger.migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("search", "0021_add_pghistory_courthouse"), + ] + + operations = [ + pgtrigger.migrations.RemoveTrigger( + model_name="claimhistory", + name="update_or_delete_snapshot_delete", + ), + pgtrigger.migrations.RemoveTrigger( + model_name="claimhistory", + name="update_or_delete_snapshot_update", + ), + pgtrigger.migrations.RemoveTrigger( + model_name="recapdocument", + name="update_or_delete_snapshot_delete", + ), + pgtrigger.migrations.RemoveTrigger( + model_name="recapdocument", + name="update_or_delete_snapshot_update", + ), + migrations.AddField( + model_name="claimhistory", + name="acms_document_guid", + field=models.CharField( + blank=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), + migrations.AddField( + model_name="claimhistoryevent", + name="acms_document_guid", + field=models.CharField( + blank=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), + migrations.AddField( + model_name="recapdocument", + name="acms_document_guid", + field=models.CharField( + blank=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), + migrations.AddField( + model_name="recapdocumentevent", + name="acms_document_guid", + field=models.CharField( + blank=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), + migrations.AlterField( + model_name="claimhistory", + name="pacer_case_id", + field=models.CharField( + blank=True, + help_text="The case ID provided by PACER. Noted in this case on a per-document-level, since we've learned that some documents from other cases can appear in curious places.", + max_length=100, + ), + ), + migrations.AlterField( + model_name="claimhistory", + name="pacer_doc_id", + field=models.CharField( + blank=True, + db_index=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), + migrations.AlterField( + model_name="claimhistoryevent", + name="pacer_case_id", + field=models.CharField( + blank=True, + help_text="The case ID provided by PACER. Noted in this case on a per-document-level, since we've learned that some documents from other cases can appear in curious places.", + max_length=100, + ), + ), + migrations.AlterField( + model_name="claimhistoryevent", + name="pacer_doc_id", + field=models.CharField( + blank=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), + migrations.AlterField( + model_name="docket", + name="pacer_case_id", + field=cl.lib.fields.CharNullField( + blank=True, + db_index=True, + help_text="The case ID provided by PACER.", + max_length=100, + null=True, + ), + ), + migrations.AlterField( + model_name="docketevent", + name="pacer_case_id", + field=cl.lib.fields.CharNullField( + blank=True, + help_text="The case ID provided by PACER.", + max_length=100, + null=True, + ), + ), + migrations.AlterField( + model_name="recapdocument", + name="pacer_doc_id", + field=models.CharField( + blank=True, + db_index=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), + migrations.AlterField( + model_name="recapdocumentevent", + name="pacer_doc_id", + field=models.CharField( + blank=True, + help_text="The ID of the document in PACER.", + max_length=64, + ), + ), + pgtrigger.migrations.AddTrigger( + model_name="claimhistory", + trigger=pgtrigger.compiler.Trigger( + name="update_or_delete_snapshot_update", + sql=pgtrigger.compiler.UpsertTriggerSql( + condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."file_size" IS DISTINCT FROM (NEW."file_size") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."thumbnail" IS DISTINCT FROM (NEW."thumbnail") OR OLD."thumbnail_status" IS DISTINCT FROM (NEW."thumbnail_status") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."ocr_status" IS DISTINCT FROM (NEW."ocr_status") OR OLD."date_upload" IS DISTINCT FROM (NEW."date_upload") OR OLD."document_number" IS DISTINCT FROM (NEW."document_number") OR OLD."attachment_number" IS DISTINCT FROM (NEW."attachment_number") OR OLD."pacer_doc_id" IS DISTINCT FROM (NEW."pacer_doc_id") OR OLD."acms_document_guid" IS DISTINCT FROM (NEW."acms_document_guid") OR OLD."is_available" IS DISTINCT FROM (NEW."is_available") OR OLD."is_free_on_pacer" IS DISTINCT FROM (NEW."is_free_on_pacer") OR OLD."is_sealed" IS DISTINCT FROM (NEW."is_sealed") OR OLD."claim_id" IS DISTINCT FROM (NEW."claim_id") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."claim_document_type" IS DISTINCT FROM (NEW."claim_document_type") OR OLD."description" IS DISTINCT FROM (NEW."description") OR OLD."claim_doc_id" IS DISTINCT FROM (NEW."claim_doc_id") OR OLD."pacer_dm_id" IS DISTINCT FROM (NEW."pacer_dm_id") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id"))', + func='INSERT INTO "search_claimhistoryevent" ("acms_document_guid", "attachment_number", "claim_doc_id", "claim_document_type", "claim_id", "date_created", "date_filed", "date_modified", "date_upload", "description", "document_number", "file_size", "filepath_ia", "filepath_local", "ia_upload_failure_count", "id", "is_available", "is_free_on_pacer", "is_sealed", "ocr_status", "pacer_case_id", "pacer_dm_id", "pacer_doc_id", "page_count", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "thumbnail", "thumbnail_status") VALUES (OLD."acms_document_guid", OLD."attachment_number", OLD."claim_doc_id", OLD."claim_document_type", OLD."claim_id", OLD."date_created", OLD."date_filed", OLD."date_modified", OLD."date_upload", OLD."description", OLD."document_number", OLD."file_size", OLD."filepath_ia", OLD."filepath_local", OLD."ia_upload_failure_count", OLD."id", OLD."is_available", OLD."is_free_on_pacer", OLD."is_sealed", OLD."ocr_status", OLD."pacer_case_id", OLD."pacer_dm_id", OLD."pacer_doc_id", OLD."page_count", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."thumbnail", OLD."thumbnail_status"); RETURN NULL;', + hash="5723c9a392630408dbd5fbc404478e5bb2743d68", + operation="UPDATE", + pgid="pgtrigger_update_or_delete_snapshot_update_137a5", + table="search_claimhistory", + when="AFTER", + ), + ), + ), + pgtrigger.migrations.AddTrigger( + model_name="claimhistory", + trigger=pgtrigger.compiler.Trigger( + name="update_or_delete_snapshot_delete", + sql=pgtrigger.compiler.UpsertTriggerSql( + func='INSERT INTO "search_claimhistoryevent" ("acms_document_guid", "attachment_number", "claim_doc_id", "claim_document_type", "claim_id", "date_created", "date_filed", "date_modified", "date_upload", "description", "document_number", "file_size", "filepath_ia", "filepath_local", "ia_upload_failure_count", "id", "is_available", "is_free_on_pacer", "is_sealed", "ocr_status", "pacer_case_id", "pacer_dm_id", "pacer_doc_id", "page_count", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "thumbnail", "thumbnail_status") VALUES (OLD."acms_document_guid", OLD."attachment_number", OLD."claim_doc_id", OLD."claim_document_type", OLD."claim_id", OLD."date_created", OLD."date_filed", OLD."date_modified", OLD."date_upload", OLD."description", OLD."document_number", OLD."file_size", OLD."filepath_ia", OLD."filepath_local", OLD."ia_upload_failure_count", OLD."id", OLD."is_available", OLD."is_free_on_pacer", OLD."is_sealed", OLD."ocr_status", OLD."pacer_case_id", OLD."pacer_dm_id", OLD."pacer_doc_id", OLD."page_count", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."thumbnail", OLD."thumbnail_status"); RETURN NULL;', + hash="e9a4793fa17211e41a75f2e0925ce7dbc11608ab", + operation="DELETE", + pgid="pgtrigger_update_or_delete_snapshot_delete_5ec04", + table="search_claimhistory", + when="AFTER", + ), + ), + ), + pgtrigger.migrations.AddTrigger( + model_name="recapdocument", + trigger=pgtrigger.compiler.Trigger( + name="update_or_delete_snapshot_update", + sql=pgtrigger.compiler.UpsertTriggerSql( + condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."file_size" IS DISTINCT FROM (NEW."file_size") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."thumbnail" IS DISTINCT FROM (NEW."thumbnail") OR OLD."thumbnail_status" IS DISTINCT FROM (NEW."thumbnail_status") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."ocr_status" IS DISTINCT FROM (NEW."ocr_status") OR OLD."date_upload" IS DISTINCT FROM (NEW."date_upload") OR OLD."document_number" IS DISTINCT FROM (NEW."document_number") OR OLD."attachment_number" IS DISTINCT FROM (NEW."attachment_number") OR OLD."pacer_doc_id" IS DISTINCT FROM (NEW."pacer_doc_id") OR OLD."acms_document_guid" IS DISTINCT FROM (NEW."acms_document_guid") OR OLD."is_available" IS DISTINCT FROM (NEW."is_available") OR OLD."is_free_on_pacer" IS DISTINCT FROM (NEW."is_free_on_pacer") OR OLD."is_sealed" IS DISTINCT FROM (NEW."is_sealed") OR OLD."docket_entry_id" IS DISTINCT FROM (NEW."docket_entry_id") OR OLD."document_type" IS DISTINCT FROM (NEW."document_type") OR OLD."description" IS DISTINCT FROM (NEW."description"))', + func='INSERT INTO "search_recapdocumentevent" ("acms_document_guid", "attachment_number", "date_created", "date_modified", "date_upload", "description", "docket_entry_id", "document_number", "document_type", "file_size", "filepath_ia", "filepath_local", "ia_upload_failure_count", "id", "is_available", "is_free_on_pacer", "is_sealed", "ocr_status", "pacer_doc_id", "page_count", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "thumbnail", "thumbnail_status") VALUES (OLD."acms_document_guid", OLD."attachment_number", OLD."date_created", OLD."date_modified", OLD."date_upload", OLD."description", OLD."docket_entry_id", OLD."document_number", OLD."document_type", OLD."file_size", OLD."filepath_ia", OLD."filepath_local", OLD."ia_upload_failure_count", OLD."id", OLD."is_available", OLD."is_free_on_pacer", OLD."is_sealed", OLD."ocr_status", OLD."pacer_doc_id", OLD."page_count", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."thumbnail", OLD."thumbnail_status"); RETURN NULL;', + hash="8c76be0c2027b4cdd0d68099137b26d0c0dc9add", + operation="UPDATE", + pgid="pgtrigger_update_or_delete_snapshot_update_8a108", + table="search_recapdocument", + when="AFTER", + ), + ), + ), + pgtrigger.migrations.AddTrigger( + model_name="recapdocument", + trigger=pgtrigger.compiler.Trigger( + name="update_or_delete_snapshot_delete", + sql=pgtrigger.compiler.UpsertTriggerSql( + func='INSERT INTO "search_recapdocumentevent" ("acms_document_guid", "attachment_number", "date_created", "date_modified", "date_upload", "description", "docket_entry_id", "document_number", "document_type", "file_size", "filepath_ia", "filepath_local", "ia_upload_failure_count", "id", "is_available", "is_free_on_pacer", "is_sealed", "ocr_status", "pacer_doc_id", "page_count", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "thumbnail", "thumbnail_status") VALUES (OLD."acms_document_guid", OLD."attachment_number", OLD."date_created", OLD."date_modified", OLD."date_upload", OLD."description", OLD."docket_entry_id", OLD."document_number", OLD."document_type", OLD."file_size", OLD."filepath_ia", OLD."filepath_local", OLD."ia_upload_failure_count", OLD."id", OLD."is_available", OLD."is_free_on_pacer", OLD."is_sealed", OLD."ocr_status", OLD."pacer_doc_id", OLD."page_count", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."thumbnail", OLD."thumbnail_status"); RETURN NULL;', + hash="ac0997c8155ad58d6e51c20c5c1e60564a6d9aab", + operation="DELETE", + pgid="pgtrigger_update_or_delete_snapshot_delete_c80e6", + table="search_recapdocument", + when="AFTER", + ), + ), + ), + ] diff --git a/cl/search/migrations/0022_search_models_update.sql b/cl/search/migrations/0022_search_models_update.sql new file mode 100644 index 0000000000..1fb83b9e4c --- /dev/null +++ b/cl/search/migrations/0022_search_models_update.sql @@ -0,0 +1,60 @@ +BEGIN; + +-- +-- Add field acms_document_guid to claimhistory +-- +ALTER TABLE "search_claimhistory" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; +ALTER TABLE "search_claimhistory" ALTER COLUMN "acms_document_guid" DROP DEFAULT; +-- +-- Add field acms_document_guid to claimhistoryevent +-- +ALTER TABLE "search_claimhistoryevent" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; +ALTER TABLE "search_claimhistoryevent" ALTER COLUMN "acms_document_guid" DROP DEFAULT; +-- +-- Add field acms_document_guid to recapdocument +-- +ALTER TABLE "search_recapdocument" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; +ALTER TABLE "search_recapdocument" ALTER COLUMN "acms_document_guid" DROP DEFAULT; +-- +-- Add field acms_document_guid to recapdocumentevent +-- +ALTER TABLE "search_recapdocumentevent" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; +ALTER TABLE "search_recapdocumentevent" ALTER COLUMN "acms_document_guid" DROP DEFAULT; +-- +-- Alter field pacer_case_id on claimhistory +-- +-- (no-op) +-- +-- Alter field pacer_doc_id on claimhistory +-- +ALTER TABLE "search_claimhistory" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); +CREATE INDEX "search_claimhistory_pacer_doc_id_ddcc4bdf" ON "search_claimhistory" ("pacer_doc_id"); +CREATE INDEX "search_claimhistory_pacer_doc_id_ddcc4bdf_like" ON "search_claimhistory" ("pacer_doc_id" varchar_pattern_ops); +-- +-- Alter field pacer_case_id on claimhistoryevent +-- +-- (no-op) +-- +-- Alter field pacer_doc_id on claimhistoryevent +-- +ALTER TABLE "search_claimhistoryevent" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); +-- +-- Alter field pacer_case_id on docket +-- +-- (no-op) +-- +-- Alter field pacer_case_id on docketevent +-- +-- (no-op) +-- +-- Alter field pacer_doc_id on recapdocument +-- +ALTER TABLE "search_recapdocument" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); +CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9" ON "search_recapdocument" ("pacer_doc_id"); +CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9_like" ON "search_recapdocument" ("pacer_doc_id" varchar_pattern_ops); +-- +-- Alter field pacer_doc_id on recapdocumentevent +-- +ALTER TABLE "search_recapdocumentevent" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); + +COMMIT; diff --git a/cl/search/models.py b/cl/search/models.py index 1c2556198c..a4a7b06ce7 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -651,7 +651,7 @@ class Docket(AbstractDateTimeModel): ) # Nullable for unique constraint requirements. pacer_case_id = fields.CharNullField( - help_text="The cased ID provided by PACER.", + help_text="The case ID provided by PACER.", max_length=100, blank=True, null=True, @@ -1344,11 +1344,14 @@ class AbstractPacerDocument(models.Model): null=True, ) pacer_doc_id = models.CharField( - help_text=( - "The ID of the document in PACER. This information is " - "provided by RECAP." - ), - max_length=32, # Same as in RECAP + help_text="The ID of the document in PACER.", + max_length=64, # Increased to support storing docketEntryId from ACMS. + blank=True, + db_index=True, + ) + acms_document_guid = models.CharField( + help_text="The ID of the document in PACER.", + max_length=64, blank=True, ) is_available = models.BooleanField( @@ -1369,6 +1372,9 @@ class AbstractPacerDocument(models.Model): class Meta: abstract = True + indexes = [ + models.Index(fields=["acms_document_guid"]), + ] @pghistory.track(AfterUpdateOrDeleteSnapshot()) @@ -2006,7 +2012,7 @@ class ClaimHistory(AbstractPacerDocument, AbstractPDF, AbstractDateTimeModel): ) pacer_case_id = models.CharField( help_text=( - "The cased ID provided by PACER. Noted in this case on a " + "The case ID provided by PACER. Noted in this case on a " "per-document-level, since we've learned that some " "documents from other cases can appear in curious places." ), From a6ceec39be0947dae520dfc3c7f4c106fc3b4c5e Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Thu, 28 Sep 2023 16:46:39 -0400 Subject: [PATCH 11/41] recap/tasks.py: Handle ACMS_DOCKET_JSON --- cl/recap/tasks.py | 96 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py index 72bdf4e770..8036782752 100644 --- a/cl/recap/tasks.py +++ b/cl/recap/tasks.py @@ -1179,6 +1179,102 @@ async def process_recap_acms_docket(pk): } +async def process_recap_acms_docket(pk): + """Process uploaded ACMS appellate docket JSON from the RECAP API endpoint. + + :param pk: The primary key of the processing queue item you want to work + on. + :returns: A dict of the form: + + { + // The PK of the docket that's created or updated + 'docket_pk': 22, + // A boolean indicating whether a new docket entry or + // recap document was created (implying a Solr needs + // updating). + 'content_updated': True, + } + + This value is a dict so that it can be ingested in a Celery chain. + + """ + start_time = now() + pq = await ProcessingQueue.objects.aget(pk=pk) + await mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS) + logger.info(f"Processing ACMS RECAP item (debug is: {pq.debug}): {pq}") + + try: + text = pq.filepath_local.read().decode() + except IOError as exc: + msg = f"Internal processing error ({exc.errno}: {exc.strerror})." + await mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED) + return None + + if process.current_process().daemon: + # yyy + data = parse_appellate_text(map_cl_to_pacer_id(pq.court_id), text) + else: + with concurrent.futures.ProcessPoolExecutor() as pool: + data = await asyncio.get_running_loop().run_in_executor( + pool, + parse_appellate_text, + map_cl_to_pacer_id(pq.court_id), + text, + ) + logger.info(f"Parsing completed of item {pq}") + + if data == {}: + # Not really a docket. Some sort of invalid document (see Juriscraper). + msg = "Not a valid docket upload." + await mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT) + return None + + # Merge the contents of the docket into CL. + d = await find_docket_object( + pq.court_id, pq.pacer_case_id, data["docket_number"] + ) + + d.add_recap_source() + await update_docket_metadata(d, data) + d, og_info = await update_docket_appellate_metadata(d, data) + if not d.pacer_case_id: + d.pacer_case_id = pq.pacer_case_id + + if pq.debug: + await mark_pq_successful(pq, d_id=d.pk) + return {"docket_pk": d.pk, "content_updated": False} + + if og_info is not None: + await og_info.asave() + d.originating_court_information = og_info + await d.asave() + + # Add the JSON to the docket in case we need it someday. + # xxx PacerHtmlFiles is ok for JSON? + pacer_file = await PacerHtmlFiles.objects.acreate( + content_object=d, upload_type=UPLOAD_TYPE.APPELLATE_DOCKET + ) + await sync_to_async(pacer_file.filepath.save)( + "docket.html", # We only care about the ext w/S3PrivateUUIDStorageTest + ContentFile(text.encode()), + ) + + des_returned, rds_created, content_updated = await add_docket_entries( + d, data["docket_entries"] + ) + await sync_to_async(add_parties_and_attorneys)(d, data["parties"]) + await process_orphan_documents(rds_created, pq.court_id, d.date_filed) + if content_updated: + newly_enqueued = enqueue_docket_alert(d.pk) + if newly_enqueued: + await sync_to_async(send_alert_and_webhook.delay)(d.pk, start_time) + await mark_pq_successful(pq, d_id=d.pk) + return { + "docket_pk": d.pk, + "content_updated": bool(rds_created or content_updated), + } + + async def process_recap_appellate_attachment( pk: int, ) -> Optional[Tuple[int, str, list[RECAPDocument]]]: From 1827afc85627a013a616e7c746e7ea9a41da66cc Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Mon, 2 Oct 2023 15:28:12 -0300 Subject: [PATCH 12/41] fix(recap): Added upload type noop migration. --- ..._alter_processingqueue_upload_type_noop.py | 65 +++++++++++++++++++ ...alter_processingqueue_upload_type_noop.sql | 10 +++ 2 files changed, 75 insertions(+) create mode 100644 cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py create mode 100644 cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql diff --git a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py new file mode 100644 index 0000000000..d4a0d142b6 --- /dev/null +++ b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py @@ -0,0 +1,65 @@ +# Generated by Django 4.2.4 on 2023-10-02 18:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ( + "recap", + "0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx", + ), + ] + + operations = [ + migrations.AlterField( + model_name="pacerhtmlfiles", + name="upload_type", + field=models.SmallIntegerField( + choices=[ + (1, "HTML Docket"), + (2, "HTML attachment page"), + (3, "PDF"), + (4, "Docket history report"), + (5, "Appellate HTML docket"), + (6, "Appellate HTML attachment page"), + (7, "Internet Archive XML docket"), + (8, "Case report (iquery.pl) page"), + (9, "Claims register page"), + (10, "Zip archive of RECAP Documents"), + (11, "Email in the SES storage format"), + (12, "Case query page"), + (13, "Appellate Case query page"), + (14, "Case query result page"), + (15, "Appellate Case query result page"), + (16, "ACMS docket JSON object"), + ], + help_text="The type of object that is uploaded", + ), + ), + migrations.AlterField( + model_name="processingqueue", + name="upload_type", + field=models.SmallIntegerField( + choices=[ + (1, "HTML Docket"), + (2, "HTML attachment page"), + (3, "PDF"), + (4, "Docket history report"), + (5, "Appellate HTML docket"), + (6, "Appellate HTML attachment page"), + (7, "Internet Archive XML docket"), + (8, "Case report (iquery.pl) page"), + (9, "Claims register page"), + (10, "Zip archive of RECAP Documents"), + (11, "Email in the SES storage format"), + (12, "Case query page"), + (13, "Appellate Case query page"), + (14, "Case query result page"), + (15, "Appellate Case query result page"), + (16, "ACMS docket JSON object"), + ], + help_text="The type of object that is uploaded", + ), + ), + ] diff --git a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql new file mode 100644 index 0000000000..b0d9d1e378 --- /dev/null +++ b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql @@ -0,0 +1,10 @@ +BEGIN; +-- +-- Alter field upload_type on pacerhtmlfiles +-- +-- (no-op) +-- +-- Alter field upload_type on processingqueue +-- +-- (no-op) +COMMIT; From 3187aceda38cf79e6dfa930d993bff6dee8a5de8 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 3 Oct 2023 15:37:05 -0300 Subject: [PATCH 13/41] fix(recap): Updated ProcessingQueue migrations. - Fixed pacer_case_id typo in migration. - Increased pacer_doc_id length to 64 - Added new field acms_document_guid - Added new UploadType --- ..._alter_processingqueue_upload_type_noop.py | 65 ------------------- ...alter_processingqueue_upload_type_noop.sql | 10 --- 2 files changed, 75 deletions(-) delete mode 100644 cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py delete mode 100644 cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql diff --git a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py deleted file mode 100644 index d4a0d142b6..0000000000 --- a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.py +++ /dev/null @@ -1,65 +0,0 @@ -# Generated by Django 4.2.4 on 2023-10-02 18:25 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ( - "recap", - "0012_rename_fjcintegrateddatabase_district_docket_number_recap_fjcin_distric_731c7b_idx", - ), - ] - - operations = [ - migrations.AlterField( - model_name="pacerhtmlfiles", - name="upload_type", - field=models.SmallIntegerField( - choices=[ - (1, "HTML Docket"), - (2, "HTML attachment page"), - (3, "PDF"), - (4, "Docket history report"), - (5, "Appellate HTML docket"), - (6, "Appellate HTML attachment page"), - (7, "Internet Archive XML docket"), - (8, "Case report (iquery.pl) page"), - (9, "Claims register page"), - (10, "Zip archive of RECAP Documents"), - (11, "Email in the SES storage format"), - (12, "Case query page"), - (13, "Appellate Case query page"), - (14, "Case query result page"), - (15, "Appellate Case query result page"), - (16, "ACMS docket JSON object"), - ], - help_text="The type of object that is uploaded", - ), - ), - migrations.AlterField( - model_name="processingqueue", - name="upload_type", - field=models.SmallIntegerField( - choices=[ - (1, "HTML Docket"), - (2, "HTML attachment page"), - (3, "PDF"), - (4, "Docket history report"), - (5, "Appellate HTML docket"), - (6, "Appellate HTML attachment page"), - (7, "Internet Archive XML docket"), - (8, "Case report (iquery.pl) page"), - (9, "Claims register page"), - (10, "Zip archive of RECAP Documents"), - (11, "Email in the SES storage format"), - (12, "Case query page"), - (13, "Appellate Case query page"), - (14, "Case query result page"), - (15, "Appellate Case query result page"), - (16, "ACMS docket JSON object"), - ], - help_text="The type of object that is uploaded", - ), - ), - ] diff --git a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql b/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql deleted file mode 100644 index b0d9d1e378..0000000000 --- a/cl/recap/migrations/0013_alter_processingqueue_upload_type_noop.sql +++ /dev/null @@ -1,10 +0,0 @@ -BEGIN; --- --- Alter field upload_type on pacerhtmlfiles --- --- (no-op) --- --- Alter field upload_type on processingqueue --- --- (no-op) -COMMIT; From 627e71b2b9f8f886cd9fdf6e17500410b1f3a9d7 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 3 Oct 2023 16:25:23 -0300 Subject: [PATCH 14/41] fix(search): Fixed migration conflicts after branch update. --- ...2_search_models_update.py => 0023_search_models_update.py} | 4 ++-- ...search_models_update.sql => 0023_search_models_update.sql} | 2 -- ..._customers.sql => 0023_search_models_update_customers.sql} | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) rename cl/search/migrations/{0022_search_models_update.py => 0023_search_models_update.py} (99%) rename cl/search/migrations/{0022_search_models_update.sql => 0023_search_models_update.sql} (99%) rename cl/search/migrations/{0022_search_models_update _customers.sql => 0023_search_models_update_customers.sql} (99%) diff --git a/cl/search/migrations/0022_search_models_update.py b/cl/search/migrations/0023_search_models_update.py similarity index 99% rename from cl/search/migrations/0022_search_models_update.py rename to cl/search/migrations/0023_search_models_update.py index 26e647e201..9bd25065ef 100644 --- a/cl/search/migrations/0022_search_models_update.py +++ b/cl/search/migrations/0023_search_models_update.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.4 on 2023-10-03 18:41 +# Generated by Django 4.2.4 on 2023-10-03 19:19 import cl.lib.fields from django.db import migrations, models @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ("search", "0021_add_pghistory_courthouse"), + ("search", "0022_alter_courthouse_court_noop"), ] operations = [ diff --git a/cl/search/migrations/0022_search_models_update.sql b/cl/search/migrations/0023_search_models_update.sql similarity index 99% rename from cl/search/migrations/0022_search_models_update.sql rename to cl/search/migrations/0023_search_models_update.sql index 1fb83b9e4c..f5fa066ad6 100644 --- a/cl/search/migrations/0022_search_models_update.sql +++ b/cl/search/migrations/0023_search_models_update.sql @@ -1,5 +1,4 @@ BEGIN; - -- -- Add field acms_document_guid to claimhistory -- @@ -56,5 +55,4 @@ CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9_like" ON "search_recapd -- Alter field pacer_doc_id on recapdocumentevent -- ALTER TABLE "search_recapdocumentevent" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); - COMMIT; diff --git a/cl/search/migrations/0022_search_models_update _customers.sql b/cl/search/migrations/0023_search_models_update_customers.sql similarity index 99% rename from cl/search/migrations/0022_search_models_update _customers.sql rename to cl/search/migrations/0023_search_models_update_customers.sql index c99cfcaf77..2ffd03847b 100644 --- a/cl/search/migrations/0022_search_models_update _customers.sql +++ b/cl/search/migrations/0023_search_models_update_customers.sql @@ -1,5 +1,4 @@ BEGIN; - -- -- Add field acms_document_guid to claimhistory -- @@ -30,5 +29,4 @@ CREATE INDEX "search_claimhistory_pacer_doc_id_ddcc4bdf_like" ON "search_claimhi ALTER TABLE "search_recapdocument" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9" ON "search_recapdocument" ("pacer_doc_id"); CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9_like" ON "search_recapdocument" ("pacer_doc_id" varchar_pattern_ops); - COMMIT; From 8cea0f8453bfbbc2e6fbc816fd748a973ca2be47 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 3 Oct 2023 18:13:24 -0300 Subject: [PATCH 15/41] fix(search): Added acms_document_guid only for RECAPDocument. --- .../migrations/0023_search_models_update.py | 64 +++---------------- .../migrations/0023_search_models_update.sql | 15 ++--- .../0023_search_models_update_customers.sql | 10 +-- cl/search/models.py | 14 ++-- 4 files changed, 24 insertions(+), 79 deletions(-) diff --git a/cl/search/migrations/0023_search_models_update.py b/cl/search/migrations/0023_search_models_update.py index 9bd25065ef..19dd3140d1 100644 --- a/cl/search/migrations/0023_search_models_update.py +++ b/cl/search/migrations/0023_search_models_update.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.4 on 2023-10-03 19:19 +# Generated by Django 4.2.4 on 2023-10-03 21:00 import cl.lib.fields from django.db import migrations, models @@ -12,14 +12,6 @@ class Migration(migrations.Migration): ] operations = [ - pgtrigger.migrations.RemoveTrigger( - model_name="claimhistory", - name="update_or_delete_snapshot_delete", - ), - pgtrigger.migrations.RemoveTrigger( - model_name="claimhistory", - name="update_or_delete_snapshot_update", - ), pgtrigger.migrations.RemoveTrigger( model_name="recapdocument", name="update_or_delete_snapshot_delete", @@ -28,24 +20,6 @@ class Migration(migrations.Migration): model_name="recapdocument", name="update_or_delete_snapshot_update", ), - migrations.AddField( - model_name="claimhistory", - name="acms_document_guid", - field=models.CharField( - blank=True, - help_text="The ID of the document in PACER.", - max_length=64, - ), - ), - migrations.AddField( - model_name="claimhistoryevent", - name="acms_document_guid", - field=models.CharField( - blank=True, - help_text="The ID of the document in PACER.", - max_length=64, - ), - ), migrations.AddField( model_name="recapdocument", name="acms_document_guid", @@ -141,33 +115,11 @@ class Migration(migrations.Migration): max_length=64, ), ), - pgtrigger.migrations.AddTrigger( - model_name="claimhistory", - trigger=pgtrigger.compiler.Trigger( - name="update_or_delete_snapshot_update", - sql=pgtrigger.compiler.UpsertTriggerSql( - condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."file_size" IS DISTINCT FROM (NEW."file_size") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."thumbnail" IS DISTINCT FROM (NEW."thumbnail") OR OLD."thumbnail_status" IS DISTINCT FROM (NEW."thumbnail_status") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."ocr_status" IS DISTINCT FROM (NEW."ocr_status") OR OLD."date_upload" IS DISTINCT FROM (NEW."date_upload") OR OLD."document_number" IS DISTINCT FROM (NEW."document_number") OR OLD."attachment_number" IS DISTINCT FROM (NEW."attachment_number") OR OLD."pacer_doc_id" IS DISTINCT FROM (NEW."pacer_doc_id") OR OLD."acms_document_guid" IS DISTINCT FROM (NEW."acms_document_guid") OR OLD."is_available" IS DISTINCT FROM (NEW."is_available") OR OLD."is_free_on_pacer" IS DISTINCT FROM (NEW."is_free_on_pacer") OR OLD."is_sealed" IS DISTINCT FROM (NEW."is_sealed") OR OLD."claim_id" IS DISTINCT FROM (NEW."claim_id") OR OLD."date_filed" IS DISTINCT FROM (NEW."date_filed") OR OLD."claim_document_type" IS DISTINCT FROM (NEW."claim_document_type") OR OLD."description" IS DISTINCT FROM (NEW."description") OR OLD."claim_doc_id" IS DISTINCT FROM (NEW."claim_doc_id") OR OLD."pacer_dm_id" IS DISTINCT FROM (NEW."pacer_dm_id") OR OLD."pacer_case_id" IS DISTINCT FROM (NEW."pacer_case_id"))', - func='INSERT INTO "search_claimhistoryevent" ("acms_document_guid", "attachment_number", "claim_doc_id", "claim_document_type", "claim_id", "date_created", "date_filed", "date_modified", "date_upload", "description", "document_number", "file_size", "filepath_ia", "filepath_local", "ia_upload_failure_count", "id", "is_available", "is_free_on_pacer", "is_sealed", "ocr_status", "pacer_case_id", "pacer_dm_id", "pacer_doc_id", "page_count", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "thumbnail", "thumbnail_status") VALUES (OLD."acms_document_guid", OLD."attachment_number", OLD."claim_doc_id", OLD."claim_document_type", OLD."claim_id", OLD."date_created", OLD."date_filed", OLD."date_modified", OLD."date_upload", OLD."description", OLD."document_number", OLD."file_size", OLD."filepath_ia", OLD."filepath_local", OLD."ia_upload_failure_count", OLD."id", OLD."is_available", OLD."is_free_on_pacer", OLD."is_sealed", OLD."ocr_status", OLD."pacer_case_id", OLD."pacer_dm_id", OLD."pacer_doc_id", OLD."page_count", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."thumbnail", OLD."thumbnail_status"); RETURN NULL;', - hash="5723c9a392630408dbd5fbc404478e5bb2743d68", - operation="UPDATE", - pgid="pgtrigger_update_or_delete_snapshot_update_137a5", - table="search_claimhistory", - when="AFTER", - ), - ), - ), - pgtrigger.migrations.AddTrigger( - model_name="claimhistory", - trigger=pgtrigger.compiler.Trigger( - name="update_or_delete_snapshot_delete", - sql=pgtrigger.compiler.UpsertTriggerSql( - func='INSERT INTO "search_claimhistoryevent" ("acms_document_guid", "attachment_number", "claim_doc_id", "claim_document_type", "claim_id", "date_created", "date_filed", "date_modified", "date_upload", "description", "document_number", "file_size", "filepath_ia", "filepath_local", "ia_upload_failure_count", "id", "is_available", "is_free_on_pacer", "is_sealed", "ocr_status", "pacer_case_id", "pacer_dm_id", "pacer_doc_id", "page_count", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "thumbnail", "thumbnail_status") VALUES (OLD."acms_document_guid", OLD."attachment_number", OLD."claim_doc_id", OLD."claim_document_type", OLD."claim_id", OLD."date_created", OLD."date_filed", OLD."date_modified", OLD."date_upload", OLD."description", OLD."document_number", OLD."file_size", OLD."filepath_ia", OLD."filepath_local", OLD."ia_upload_failure_count", OLD."id", OLD."is_available", OLD."is_free_on_pacer", OLD."is_sealed", OLD."ocr_status", OLD."pacer_case_id", OLD."pacer_dm_id", OLD."pacer_doc_id", OLD."page_count", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."thumbnail", OLD."thumbnail_status"); RETURN NULL;', - hash="e9a4793fa17211e41a75f2e0925ce7dbc11608ab", - operation="DELETE", - pgid="pgtrigger_update_or_delete_snapshot_delete_5ec04", - table="search_claimhistory", - when="AFTER", - ), + migrations.AddIndex( + model_name="recapdocument", + index=models.Index( + fields=["acms_document_guid"], + name="search_reca_acms_do_17c11f_idx", ), ), pgtrigger.migrations.AddTrigger( @@ -175,9 +127,9 @@ class Migration(migrations.Migration): trigger=pgtrigger.compiler.Trigger( name="update_or_delete_snapshot_update", sql=pgtrigger.compiler.UpsertTriggerSql( - condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."file_size" IS DISTINCT FROM (NEW."file_size") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."thumbnail" IS DISTINCT FROM (NEW."thumbnail") OR OLD."thumbnail_status" IS DISTINCT FROM (NEW."thumbnail_status") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."ocr_status" IS DISTINCT FROM (NEW."ocr_status") OR OLD."date_upload" IS DISTINCT FROM (NEW."date_upload") OR OLD."document_number" IS DISTINCT FROM (NEW."document_number") OR OLD."attachment_number" IS DISTINCT FROM (NEW."attachment_number") OR OLD."pacer_doc_id" IS DISTINCT FROM (NEW."pacer_doc_id") OR OLD."acms_document_guid" IS DISTINCT FROM (NEW."acms_document_guid") OR OLD."is_available" IS DISTINCT FROM (NEW."is_available") OR OLD."is_free_on_pacer" IS DISTINCT FROM (NEW."is_free_on_pacer") OR OLD."is_sealed" IS DISTINCT FROM (NEW."is_sealed") OR OLD."docket_entry_id" IS DISTINCT FROM (NEW."docket_entry_id") OR OLD."document_type" IS DISTINCT FROM (NEW."document_type") OR OLD."description" IS DISTINCT FROM (NEW."description"))', + condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."file_size" IS DISTINCT FROM (NEW."file_size") OR OLD."filepath_local" IS DISTINCT FROM (NEW."filepath_local") OR OLD."filepath_ia" IS DISTINCT FROM (NEW."filepath_ia") OR OLD."ia_upload_failure_count" IS DISTINCT FROM (NEW."ia_upload_failure_count") OR OLD."thumbnail" IS DISTINCT FROM (NEW."thumbnail") OR OLD."thumbnail_status" IS DISTINCT FROM (NEW."thumbnail_status") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."ocr_status" IS DISTINCT FROM (NEW."ocr_status") OR OLD."date_upload" IS DISTINCT FROM (NEW."date_upload") OR OLD."document_number" IS DISTINCT FROM (NEW."document_number") OR OLD."attachment_number" IS DISTINCT FROM (NEW."attachment_number") OR OLD."pacer_doc_id" IS DISTINCT FROM (NEW."pacer_doc_id") OR OLD."is_available" IS DISTINCT FROM (NEW."is_available") OR OLD."is_free_on_pacer" IS DISTINCT FROM (NEW."is_free_on_pacer") OR OLD."is_sealed" IS DISTINCT FROM (NEW."is_sealed") OR OLD."docket_entry_id" IS DISTINCT FROM (NEW."docket_entry_id") OR OLD."document_type" IS DISTINCT FROM (NEW."document_type") OR OLD."description" IS DISTINCT FROM (NEW."description") OR OLD."acms_document_guid" IS DISTINCT FROM (NEW."acms_document_guid"))', func='INSERT INTO "search_recapdocumentevent" ("acms_document_guid", "attachment_number", "date_created", "date_modified", "date_upload", "description", "docket_entry_id", "document_number", "document_type", "file_size", "filepath_ia", "filepath_local", "ia_upload_failure_count", "id", "is_available", "is_free_on_pacer", "is_sealed", "ocr_status", "pacer_doc_id", "page_count", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "thumbnail", "thumbnail_status") VALUES (OLD."acms_document_guid", OLD."attachment_number", OLD."date_created", OLD."date_modified", OLD."date_upload", OLD."description", OLD."docket_entry_id", OLD."document_number", OLD."document_type", OLD."file_size", OLD."filepath_ia", OLD."filepath_local", OLD."ia_upload_failure_count", OLD."id", OLD."is_available", OLD."is_free_on_pacer", OLD."is_sealed", OLD."ocr_status", OLD."pacer_doc_id", OLD."page_count", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."thumbnail", OLD."thumbnail_status"); RETURN NULL;', - hash="8c76be0c2027b4cdd0d68099137b26d0c0dc9add", + hash="e304ad88bf00f8261bfef673a37c9f8320d0522d", operation="UPDATE", pgid="pgtrigger_update_or_delete_snapshot_update_8a108", table="search_recapdocument", diff --git a/cl/search/migrations/0023_search_models_update.sql b/cl/search/migrations/0023_search_models_update.sql index f5fa066ad6..97dd102ffa 100644 --- a/cl/search/migrations/0023_search_models_update.sql +++ b/cl/search/migrations/0023_search_models_update.sql @@ -1,15 +1,5 @@ BEGIN; -- --- Add field acms_document_guid to claimhistory --- -ALTER TABLE "search_claimhistory" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; -ALTER TABLE "search_claimhistory" ALTER COLUMN "acms_document_guid" DROP DEFAULT; --- --- Add field acms_document_guid to claimhistoryevent --- -ALTER TABLE "search_claimhistoryevent" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; -ALTER TABLE "search_claimhistoryevent" ALTER COLUMN "acms_document_guid" DROP DEFAULT; --- -- Add field acms_document_guid to recapdocument -- ALTER TABLE "search_recapdocument" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; @@ -55,4 +45,9 @@ CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9_like" ON "search_recapd -- Alter field pacer_doc_id on recapdocumentevent -- ALTER TABLE "search_recapdocumentevent" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); +-- +-- Create index search_reca_acms_do_17c11f_idx on field(s) acms_document_guid of model recapdocument +-- +CREATE INDEX "search_reca_acms_do_17c11f_idx" ON "search_recapdocument" ("acms_document_guid"); + COMMIT; diff --git a/cl/search/migrations/0023_search_models_update_customers.sql b/cl/search/migrations/0023_search_models_update_customers.sql index 2ffd03847b..0a580272f5 100644 --- a/cl/search/migrations/0023_search_models_update_customers.sql +++ b/cl/search/migrations/0023_search_models_update_customers.sql @@ -1,10 +1,5 @@ BEGIN; -- --- Add field acms_document_guid to claimhistory --- -ALTER TABLE "search_claimhistory" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; -ALTER TABLE "search_claimhistory" ALTER COLUMN "acms_document_guid" DROP DEFAULT; --- -- Add field acms_document_guid to recapdocument -- ALTER TABLE "search_recapdocument" ADD COLUMN "acms_document_guid" varchar(64) DEFAULT '' NOT NULL; @@ -29,4 +24,9 @@ CREATE INDEX "search_claimhistory_pacer_doc_id_ddcc4bdf_like" ON "search_claimhi ALTER TABLE "search_recapdocument" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9" ON "search_recapdocument" ("pacer_doc_id"); CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9_like" ON "search_recapdocument" ("pacer_doc_id" varchar_pattern_ops); +-- +-- Create index search_reca_acms_do_17c11f_idx on field(s) acms_document_guid of model recapdocument +-- +CREATE INDEX "search_reca_acms_do_17c11f_idx" ON "search_recapdocument" ("acms_document_guid"); + COMMIT; diff --git a/cl/search/models.py b/cl/search/models.py index a4a7b06ce7..75d8174ff8 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -1349,11 +1349,6 @@ class AbstractPacerDocument(models.Model): blank=True, db_index=True, ) - acms_document_guid = models.CharField( - help_text="The ID of the document in PACER.", - max_length=64, - blank=True, - ) is_available = models.BooleanField( help_text="True if the item is available in RECAP", blank=True, @@ -1372,9 +1367,6 @@ class AbstractPacerDocument(models.Model): class Meta: abstract = True - indexes = [ - models.Index(fields=["acms_document_guid"]), - ] @pghistory.track(AfterUpdateOrDeleteSnapshot()) @@ -1414,6 +1406,11 @@ class RECAPDocument(AbstractPacerDocument, AbstractPDF, AbstractDateTimeModel): ), blank=True, ) + acms_document_guid = models.CharField( + help_text="The ID of the document in PACER.", + max_length=64, + blank=True, + ) es_rd_field_tracker = FieldTracker( fields=[ @@ -1449,6 +1446,7 @@ class Meta: fields=["filepath_local"], name="search_recapdocument_filepath_local_7dc6b0e53ccf753_uniq", ), + models.Index(fields=["acms_document_guid"]), ] permissions = (("has_recap_api_access", "Can work with RECAP API"),) From f2e8f2c50a6da476a3455de712a9f8db728fbdbd Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 3 Oct 2023 18:23:20 -0300 Subject: [PATCH 16/41] fix(search): Fixed acms_document_guid help text --- cl/search/migrations/0023_search_models_update.py | 6 +++--- cl/search/models.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cl/search/migrations/0023_search_models_update.py b/cl/search/migrations/0023_search_models_update.py index 19dd3140d1..f857df1d7f 100644 --- a/cl/search/migrations/0023_search_models_update.py +++ b/cl/search/migrations/0023_search_models_update.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.4 on 2023-10-03 21:00 +# Generated by Django 4.2.4 on 2023-10-03 21:21 import cl.lib.fields from django.db import migrations, models @@ -25,7 +25,7 @@ class Migration(migrations.Migration): name="acms_document_guid", field=models.CharField( blank=True, - help_text="The ID of the document in PACER.", + help_text="The GUID of the document in ACMS.", max_length=64, ), ), @@ -34,7 +34,7 @@ class Migration(migrations.Migration): name="acms_document_guid", field=models.CharField( blank=True, - help_text="The ID of the document in PACER.", + help_text="The GUID of the document in ACMS.", max_length=64, ), ), diff --git a/cl/search/models.py b/cl/search/models.py index 75d8174ff8..b9909786a5 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -1407,7 +1407,7 @@ class RECAPDocument(AbstractPacerDocument, AbstractPDF, AbstractDateTimeModel): blank=True, ) acms_document_guid = models.CharField( - help_text="The ID of the document in PACER.", + help_text="The GUID of the document in ACMS.", max_length=64, blank=True, ) From 2bb3bf1bfb5bed5be48425e3e1fa1fceb53bc555 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 4 Oct 2023 13:51:47 -0400 Subject: [PATCH 17/41] recap/tests.py: ACMS `pacer_case_id` test is bogus Oops, I'm not quite sure how I only wrote this halfway on Monday. We want this test to succeed, not to fail, and to return HTTP_201_CREATED, not HTTP_400_REQUEST. --- cl/recap/tests.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cl/recap/tests.py b/cl/recap/tests.py index 7463c15a9d..ad69b53c33 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -610,7 +610,7 @@ def test_recap_upload_validate_pacer_case_id(self, mock): ) def test_recap_upload_validate_acms_pacer_case_id(self, mock): - """Can we properly validate the pacer_case_id doesn't contain a dash -?""" + """Can we properly validate a pacer_case_id that is a GUIDs.?""" self.data.update( { "upload_type": UPLOAD_TYPE.ACMS_DOCKET_JSON, @@ -621,11 +621,8 @@ def test_recap_upload_validate_acms_pacer_case_id(self, mock): del self.data["pacer_doc_id"] r = self.client.post(self.path, self.data) j = json.loads(r.content) - self.assertEqual(r.status_code, HTTP_400_BAD_REQUEST) - self.assertIn( - "PACER case ID can not contain a single (-); that looks like a docket number.", - j["non_field_errors"][0], - ) + + self.assertEqual(r.status_code, HTTP_201_CREATED) @mock.patch("cl.recap.tasks.DocketReport", new=fakes.FakeDocketReport) From 5937d105a8eece2c49067f9ec4b60e03c9adaef6 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 4 Oct 2023 14:15:41 -0400 Subject: [PATCH 18/41] recap/api_serializers.py: Relax another dash constraint Turns out there were two copies of the `pacer_case_id` dash counting constraint, one in ProcessingQueueSerializer() dealt with in 4d5999db754c917b38b20fa8d1b8aefd09e072ad, and one in PacerFetchQueueSerializer() which is fixed herein. Oops. Also, should these tests really be duplicated like that? --- cl/recap/api_serializers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cl/recap/api_serializers.py b/cl/recap/api_serializers.py index ee1423a3e4..fa5672d87d 100644 --- a/cl/recap/api_serializers.py +++ b/cl/recap/api_serializers.py @@ -290,8 +290,13 @@ def validate(self, attrs): "without 'court' parameter." ) - if attrs.get("pacer_case_id") and "-" in attrs.get("pacer_case_id"): - raise ValidationError("PACER case ID can not contains dashes -") + if attrs.get("pacer_case_id"): + dashes = attrs.get("pacer_case_id").count("-") + if dashes == 1: + raise ValidationError( + "PACER case ID can not contain a single (-); " + "that looks like a docket number." + ) if attrs.get("docket_number") and not attrs.get("court"): # If a docket_number is included, is a court also? From 4c06b1bb91fc0446ce89a5152b562c11a3bebb6b Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Wed, 4 Oct 2023 14:28:58 -0400 Subject: [PATCH 19/41] recap/tests.py: Missing "that " This better be the last of these. --- cl/recap/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cl/recap/tests.py b/cl/recap/tests.py index ad69b53c33..19f0f1120b 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -775,7 +775,7 @@ def test_recap_fetch_validate_pacer_case_id(self, mock): serialized_fq.is_valid() self.assertIn( serialized_fq.errors["non_field_errors"][0], - "PACER case ID can not contain a single (-); looks like a docket number.", + "PACER case ID can not contain a single (-); that looks like a docket number.", ) def test_key_serialization_with_client_code(self, mock) -> None: From 968868b4fd819ad449233f99d8f1bb245ced6209 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Thu, 5 Oct 2023 11:20:04 -0300 Subject: [PATCH 20/41] fix(recap): Tweaked process_recap_acms_docket to use parse_acms_json. - Also added acms docket processing test. --- cl/recap/factories.py | 1 + cl/recap/tasks.py | 4 ++-- cl/recap/tests.py | 39 ++++++++++++++++++++++++++++++++++++++- cl/tests/utils.py | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 3 deletions(-) diff --git a/cl/recap/factories.py b/cl/recap/factories.py index 2de099a100..a2d40d0ffb 100644 --- a/cl/recap/factories.py +++ b/cl/recap/factories.py @@ -115,4 +115,5 @@ class DocketDataFactory(DictFactory): case_name = Faker("case_name") docket_entries = List([SubFactory(MinuteDocketEntryDataFactory)]) docket_number = Faker("federal_district_docket_number") + date_filed = Faker("date_object") ordered_by = "date_filed" diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py index 8036782752..b8aa5904ca 100644 --- a/cl/recap/tasks.py +++ b/cl/recap/tasks.py @@ -1212,12 +1212,12 @@ async def process_recap_acms_docket(pk): if process.current_process().daemon: # yyy - data = parse_appellate_text(map_cl_to_pacer_id(pq.court_id), text) + data = parse_acms_json(map_cl_to_pacer_id(pq.court_id), text) else: with concurrent.futures.ProcessPoolExecutor() as pool: data = await asyncio.get_running_loop().run_in_executor( pool, - parse_appellate_text, + parse_acms_json, map_cl_to_pacer_id(pq.court_id), text, ) diff --git a/cl/recap/tests.py b/cl/recap/tests.py index 19f0f1120b..0784541c53 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -102,6 +102,7 @@ do_pacer_fetch, fetch_pacer_doc_by_rd, get_and_copy_recap_attachment_docs, + process_recap_acms_docket, process_recap_appellate_attachment, process_recap_appellate_docket, process_recap_attachment, @@ -127,7 +128,7 @@ ) from cl.tests import fakes from cl.tests.cases import SimpleTestCase, TestCase -from cl.tests.utils import MockResponse +from cl.tests.utils import MockACMSDocketReport, MockResponse from cl.users.factories import ( UserProfileWithParentsFactory, UserWithChildProfileFactory, @@ -624,6 +625,42 @@ def test_recap_upload_validate_acms_pacer_case_id(self, mock): self.assertEqual(r.status_code, HTTP_201_CREATED) + def test_processing_an_acms_docket(self, mock_upload): + """Can we process an ACMS docket report? + + Note that this works fine even though we're not actually uploading a + docket due to the mock. + """ + + pq = ProcessingQueue.objects.create( + court=self.court_appellate, + uploader=self.user, + pacer_case_id="34cacf7f-52d5-4d1f-b4f0-0542b429f674", + upload_type=UPLOAD_TYPE.ACMS_DOCKET_JSON, + filepath_local=self.f, + ) + with mock.patch( + "cl.recap.tasks.ACMSDocketReport", MockACMSDocketReport + ): + # Process the ACMS docket report. + async_to_sync(process_recap_acms_docket)(pq.pk) + + docket = Docket.objects.get( + pacer_case_id="34cacf7f-52d5-4d1f-b4f0-0542b429f674" + ) + docket_entries = DocketEntry.objects.filter(docket=docket).order_by( + "date_created" + ) + + # Confirm Docket entry and RECAPDocument is properly created. + self.assertEqual(docket_entries.count(), 1) + recap_documents = RECAPDocument.objects.all().order_by("date_created") + self.assertEqual(recap_documents.count(), 1) + self.assertEqual( + recap_documents[0].pacer_doc_id, + "bde556a7-bdde-ed11-a7c6-001dd806a1fd", + ) + @mock.patch("cl.recap.tasks.DocketReport", new=fakes.FakeDocketReport) @mock.patch( diff --git a/cl/tests/utils.py b/cl/tests/utils.py index 8a5177ec82..a1f0aa7c06 100644 --- a/cl/tests/utils.py +++ b/cl/tests/utils.py @@ -9,6 +9,8 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait +from cl.recap.factories import DocketDataFactory, DocketEntryDataFactory + def make_client(user_pk: int) -> APIClient: user = User.objects.get(pk=user_pk) @@ -57,3 +59,36 @@ def __init__( if mock_raw is True: file_stream = ContentFile("OK") self.raw = file_stream + + +class MockACMSDocketReport: + def __init__(self, court_id): + pass + + def _parse_text(self, json): + acms_data = DocketDataFactory( + court_id="ca9", + docket_number="23-6364", + parties=[ + { + "attorneys": [ + { + "contact": "Dawson Parrish, PC\n309 W. 7th St.\nSte 915\nFt. Worth, TX 76102\n817-870-1212\nEmail: hparrish@dawsonparrish.com\n", + "name": "John Hunter Parrish", + "roles": ["ATTORNEY TO BE NOTICED"], + } + ], + "date_terminated": None, + "extra_info": "", + "name": "RFC Drilling, LLC", + "type": "Plaintiff", + }, + ], + docket_entries=[ + DocketEntryDataFactory( + pacer_doc_id="bde556a7-bdde-ed11-a7c6-001dd806a1fd", + document_number=1, + ) + ], + ) + self.data = acms_data From 5ac3a820aa3310904cfaa22f84b7511161efc7b4 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 17 Oct 2023 13:12:54 -0300 Subject: [PATCH 21/41] fix(recap): Avoid converting docket entries naive date_filed --- cl/lib/timezone_helpers.py | 10 ++++++- cl/recap/tests.py | 29 ++++++++++++++------ cl/tests/utils.py | 55 +++++++++++++++++++++++++++++++------- 3 files changed, 75 insertions(+), 19 deletions(-) diff --git a/cl/lib/timezone_helpers.py b/cl/lib/timezone_helpers.py index a26feb44af..81d6772943 100644 --- a/cl/lib/timezone_helpers.py +++ b/cl/lib/timezone_helpers.py @@ -1,6 +1,7 @@ from datetime import date, datetime, time import pytz +from django.utils.timezone import is_naive from cl.recap.constants import COURT_TIMEZONES @@ -31,7 +32,14 @@ def localize_date_and_time( :return: A tuple of date_filed and time_filed or None if no time available. """ if isinstance(date_filed, datetime): - datetime_filed_local = convert_to_court_timezone(court_id, date_filed) + if is_naive(date_filed): + datetime_filed_local = localize_naive_datetime_to_court_timezone( + court_id, date_filed + ) + else: + datetime_filed_local = convert_to_court_timezone( + court_id, date_filed + ) time_filed = datetime_filed_local.time() date_filed = datetime_filed_local.date() return date_filed, time_filed diff --git a/cl/recap/tests.py b/cl/recap/tests.py index 0784541c53..2775e8401d 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -1,7 +1,7 @@ import json import os from copy import deepcopy -from datetime import date, datetime, timedelta, timezone +from datetime import date, datetime, time, timedelta, timezone from pathlib import Path from unittest import mock from unittest.mock import ANY @@ -146,7 +146,7 @@ def setUpTestData(cls): cls.court_appellate = CourtFactory( id="ca9", jurisdiction="F", in_use=True ) - + cls.ca2 = CourtFactory(id="ca2", jurisdiction="F", in_use=True) cls.att_data = AppellateAttachmentPageFactory( attachments=[ AppellateAttachmentFactory( @@ -633,9 +633,9 @@ def test_processing_an_acms_docket(self, mock_upload): """ pq = ProcessingQueue.objects.create( - court=self.court_appellate, + court=self.ca2, uploader=self.user, - pacer_case_id="34cacf7f-52d5-4d1f-b4f0-0542b429f674", + pacer_case_id="9f5ae37f-c44e-4194-b075-3f8f028559c4", upload_type=UPLOAD_TYPE.ACMS_DOCKET_JSON, filepath_local=self.f, ) @@ -646,20 +646,33 @@ def test_processing_an_acms_docket(self, mock_upload): async_to_sync(process_recap_acms_docket)(pq.pk) docket = Docket.objects.get( - pacer_case_id="34cacf7f-52d5-4d1f-b4f0-0542b429f674" + pacer_case_id="9f5ae37f-c44e-4194-b075-3f8f028559c4" ) docket_entries = DocketEntry.objects.filter(docket=docket).order_by( "date_created" ) # Confirm Docket entry and RECAPDocument is properly created. - self.assertEqual(docket_entries.count(), 1) + self.assertEqual(docket_entries.count(), 2) recap_documents = RECAPDocument.objects.all().order_by("date_created") - self.assertEqual(recap_documents.count(), 1) + self.assertEqual(recap_documents.count(), 2) self.assertEqual( recap_documents[0].pacer_doc_id, - "bde556a7-bdde-ed11-a7c6-001dd806a1fd", + "46de54cd-3561-ee11-be6e-001dd804e087", ) + self.assertEqual( + recap_documents[1].pacer_doc_id, + "0d24550b-3761-ee11-be6e-001dd804e087", + ) + + # Confirm the naive date_filed is not converted. + de_1 = DocketEntry.objects.get(docket__court=self.ca2, entry_number=1) + self.assertEqual(de_1.date_filed, date(2023, 10, 2)) + self.assertEqual(de_1.time_filed, time(11, 17, 0)) + + de_2 = DocketEntry.objects.get(docket__court=self.ca2, entry_number=2) + self.assertEqual(de_2.date_filed, date(2023, 10, 2)) + self.assertEqual(de_2.time_filed, time(11, 20, 0)) @mock.patch("cl.recap.tasks.DocketReport", new=fakes.FakeDocketReport) diff --git a/cl/tests/utils.py b/cl/tests/utils.py index a1f0aa7c06..888c912f1a 100644 --- a/cl/tests/utils.py +++ b/cl/tests/utils.py @@ -1,3 +1,4 @@ +from datetime import date, datetime from typing import Tuple from django.contrib.auth.models import User @@ -67,28 +68,62 @@ def __init__(self, court_id): def _parse_text(self, json): acms_data = DocketDataFactory( - court_id="ca9", + court_id="ca2", + appeal_from="S.D.N.Y . (NEW YORK CITY)", + case_name="United States of America v. Raji", + case_type_information="Criminal, Direct Criminal", + date_filed=date(2023, 10, 2), docket_number="23-6364", + fee_status="IFP Granted", + originating_court_information={ + "identifier": "S.D.N.Y. (NEW YORK CITY)", + "name": "S.D.N.Y . (NEW YORK CITY)", + }, + pacer_case_id="9f5ae37f-c44e-4194-b075-3f8f028559c4", parties=[ { "attorneys": [ { - "contact": "Dawson Parrish, PC\n309 W. 7th St.\nSte 915\nFt. Worth, TX 76102\n817-870-1212\nEmail: hparrish@dawsonparrish.com\n", - "name": "John Hunter Parrish", - "roles": ["ATTORNEY TO BE NOTICED"], + "contact": "Email: won.shin@usdoj.gov\nUnited States Attorney's Office for the Southern District of New York\nOne Saint Andrew's Plaza\nNew York, NY 10007", + "name": "Won S. Shin, Assistant U.S. Attorney", + "roles": ["US Attorney"], } ], - "date_terminated": None, - "extra_info": "", - "name": "RFC Drilling, LLC", - "type": "Plaintiff", + "name": "UNITED STATES OF AMERICA", + "type": "AppelleeUSA", + }, + { + "attorneys": [ + { + "contact": "Direct: 212-571-5500\nEmail: jschneider@rssslaaw.com\nRothman, Schneider, Soloway & Stern, LLP\n100 Lafayette Street\nSuite 501\nNew York, NY 10013", + "name": "Jeremy Schneider, -", + "roles": ["CJA Appointment"], + } + ], + "name": "MUSTAPHA RAJI", + "type": "Appellant", + "unparsed": [ + "\u00a0\u00a0\u00a0\u00a0AKA Sealed Defendant 1, " + ], }, ], docket_entries=[ DocketEntryDataFactory( - pacer_doc_id="bde556a7-bdde-ed11-a7c6-001dd806a1fd", + date_filed=datetime(2023, 10, 2, 11, 17, 0), + date_entered=datetime(2023, 10, 2, 11, 17, 0), + description="

NOTICE OF CRIMINAL APPEAL, with district court docket, on behalf of Appellant Mustapha Raji, FILED. [Entered: 10/02/2023 11:17 AM]

", + pacer_doc_id="46de54cd-3561-ee11-be6e-001dd804e087", document_number=1, - ) + page_count=18, + ), + DocketEntryDataFactory( + date_filed=datetime(2023, 10, 2, 11, 20, 0), + date_entered=datetime(2023, 10, 2, 11, 20, 0), + description="

DISTRICT COURT JUDGMENT, dated 09/19/2023, RECEIVED. [Entered: 10/02/2023 11:20 AM]

", + pacer_doc_id="0d24550b-3761-ee11-be6e-001dd804e087", + document_number=2, + page_count=8, + ), ], ) self.data = acms_data From 5f026c57010ba652f45bf9a715ecf6662b3e3094 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Tue, 17 Oct 2023 15:36:25 -0400 Subject: [PATCH 22/41] timezone_helpers.py: Explain localize_date_and_time() conversions Clearly explain the naive/aware datetime handling. --- cl/lib/timezone_helpers.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cl/lib/timezone_helpers.py b/cl/lib/timezone_helpers.py index 81d6772943..b7f9175927 100644 --- a/cl/lib/timezone_helpers.py +++ b/cl/lib/timezone_helpers.py @@ -27,6 +27,12 @@ def localize_date_and_time( """Localize the date and time into local court timezone, split it into date and time. + If given a "naive datetime" (no timezone information), assume the + time is in the court's local time. and return timezone-aware date + and time objects. If given a timezone-aware datetime, convert it + to the court's local timezone and return timezone-aware date and + time objects. + :param court_id: The court_id to get the timezone from. :param date_filed: The date or datetime instance provided by the source. :return: A tuple of date_filed and time_filed or None if no time available. From e2df2e868b8d62769e8e5d9fbc47670a6b73fa0d Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 17 Oct 2023 20:08:44 -0300 Subject: [PATCH 23/41] fix(recap): Update mock data [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cl/tests/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cl/tests/utils.py b/cl/tests/utils.py index 888c912f1a..2589ea8157 100644 --- a/cl/tests/utils.py +++ b/cl/tests/utils.py @@ -69,15 +69,15 @@ def __init__(self, court_id): def _parse_text(self, json): acms_data = DocketDataFactory( court_id="ca2", - appeal_from="S.D.N.Y . (NEW YORK CITY)", - case_name="United States of America v. Raji", - case_type_information="Criminal, Direct Criminal", + appeal_from="Department of Justice", + case_name="Ascent Pharmaceuticals, Inc. v. United States Drug Enforcement Administration", + case_type_information="Agency, Non-Immigration Petition for Review", date_filed=date(2023, 10, 2), docket_number="23-6364", fee_status="IFP Granted", originating_court_information={ - "identifier": "S.D.N.Y. (NEW YORK CITY)", - "name": "S.D.N.Y . (NEW YORK CITY)", + "identifier": "DOJ", + "name": "Department of Justice", }, pacer_case_id="9f5ae37f-c44e-4194-b075-3f8f028559c4", parties=[ From 3e03c0cc5309254452029228939924a8c149d633 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Tue, 24 Oct 2023 11:05:06 -0300 Subject: [PATCH 24/41] fix(search): migration 23->24 --- ...rch_models_update.py => 0024_search_models_acms_update.py} | 4 ++-- ...pdate_customers.sql => 0024_search_models_acms_update.sql} | 2 +- ...pdate.sql => 0024_search_models_acms_update_customers.sql} | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) rename cl/search/migrations/{0023_search_models_update.py => 0024_search_models_acms_update.py} (98%) rename cl/search/migrations/{0023_search_models_update_customers.sql => 0024_search_models_acms_update.sql} (99%) rename cl/search/migrations/{0023_search_models_update.sql => 0024_search_models_acms_update_customers.sql} (99%) diff --git a/cl/search/migrations/0023_search_models_update.py b/cl/search/migrations/0024_search_models_acms_update.py similarity index 98% rename from cl/search/migrations/0023_search_models_update.py rename to cl/search/migrations/0024_search_models_acms_update.py index f857df1d7f..7bdb4ac740 100644 --- a/cl/search/migrations/0023_search_models_update.py +++ b/cl/search/migrations/0024_search_models_acms_update.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.4 on 2023-10-03 21:21 +# Generated by Django 4.2.4 on 2023-10-24 13:54 import cl.lib.fields from django.db import migrations, models @@ -8,7 +8,7 @@ class Migration(migrations.Migration): dependencies = [ - ("search", "0022_alter_courthouse_court_noop"), + ("search", "0023_add_docket_sources_noop"), ] operations = [ diff --git a/cl/search/migrations/0023_search_models_update_customers.sql b/cl/search/migrations/0024_search_models_acms_update.sql similarity index 99% rename from cl/search/migrations/0023_search_models_update_customers.sql rename to cl/search/migrations/0024_search_models_acms_update.sql index 0a580272f5..c5832af015 100644 --- a/cl/search/migrations/0023_search_models_update_customers.sql +++ b/cl/search/migrations/0024_search_models_acms_update.sql @@ -19,6 +19,7 @@ CREATE INDEX "search_claimhistory_pacer_doc_id_ddcc4bdf_like" ON "search_claimhi -- -- (no-op) -- +-- -- Alter field pacer_doc_id on recapdocument -- ALTER TABLE "search_recapdocument" ALTER COLUMN "pacer_doc_id" TYPE varchar(64); @@ -28,5 +29,4 @@ CREATE INDEX "search_recapdocument_pacer_doc_id_e52314d9_like" ON "search_recapd -- Create index search_reca_acms_do_17c11f_idx on field(s) acms_document_guid of model recapdocument -- CREATE INDEX "search_reca_acms_do_17c11f_idx" ON "search_recapdocument" ("acms_document_guid"); - COMMIT; diff --git a/cl/search/migrations/0023_search_models_update.sql b/cl/search/migrations/0024_search_models_acms_update_customers.sql similarity index 99% rename from cl/search/migrations/0023_search_models_update.sql rename to cl/search/migrations/0024_search_models_acms_update_customers.sql index 97dd102ffa..ebfb0a7345 100644 --- a/cl/search/migrations/0023_search_models_update.sql +++ b/cl/search/migrations/0024_search_models_acms_update_customers.sql @@ -49,5 +49,4 @@ ALTER TABLE "search_recapdocumentevent" ALTER COLUMN "pacer_doc_id" TYPE varchar -- Create index search_reca_acms_do_17c11f_idx on field(s) acms_document_guid of model recapdocument -- CREATE INDEX "search_reca_acms_do_17c11f_idx" ON "search_recapdocument" ("acms_document_guid"); - COMMIT; From 132c2d673a2bb6bd476113d227632b190614610f Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 8 Nov 2023 20:20:08 -0400 Subject: [PATCH 25/41] fix(recap): Fixed acms_document_guid help text --- cl/recap/migrations/0013_processingqueue_update.py | 2 +- cl/recap/models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cl/recap/migrations/0013_processingqueue_update.py b/cl/recap/migrations/0013_processingqueue_update.py index 044cd226df..366a7c2b75 100644 --- a/cl/recap/migrations/0013_processingqueue_update.py +++ b/cl/recap/migrations/0013_processingqueue_update.py @@ -17,7 +17,7 @@ class Migration(migrations.Migration): name="acms_document_guid", field=models.CharField( blank=True, - help_text="The ID of the document in PACER.", + help_text="The GUID of the document in ACMS.", max_length=64, ), ), diff --git a/cl/recap/models.py b/cl/recap/models.py index 840048c6f9..53811d46a2 100644 --- a/cl/recap/models.py +++ b/cl/recap/models.py @@ -131,7 +131,7 @@ class ProcessingQueue(AbstractDateTimeModel): db_index=True, ) acms_document_guid = models.CharField( - help_text="The ID of the document in PACER.", + help_text="The GUID of the document in ACMS.", max_length=64, blank=True, ) From 71cd33f5f68f35ecb592363cc12c1363230183ea Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 8 Nov 2023 20:22:28 -0400 Subject: [PATCH 26/41] feat(search): Add a helper method to compute ACMS URLs --- cl/search/models.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cl/search/models.py b/cl/search/models.py index b9909786a5..29ec66e2ab 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -945,6 +945,12 @@ def pacer_appellate_url_with_caseNum(self, path): f"incDktEntries=Y" ) + def pacer_acms_url(self): + return ( + f"https://{self.pacer_court_id}-showdoc.azurewebsites.us/" + f"{self.docket_number}" + ) + @property def pacer_docket_url(self): if self.court.jurisdiction == Court.FEDERAL_APPELLATE: @@ -955,6 +961,8 @@ def pacer_docket_url(self): if not self.pacer_case_id: return self.pacer_appellate_url_with_caseNum(path) + elif self.pacer_case_id.count("-") > 1: + return self.pacer_acms_url() else: return self.pacer_appellate_url_with_caseId(path) else: From 8fd8245df95dadf30d351b8da5d32ff912eb0cf0 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 15 Nov 2023 19:27:52 -0400 Subject: [PATCH 27/41] feat(search): Add 'Buy on Pacer' link for ACMS documents. --- cl/search/models.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/cl/search/models.py b/cl/search/models.py index 29ec66e2ab..520fca7f93 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -1519,15 +1519,21 @@ def pacer_url(self) -> str | None: court = self.docket_entry.docket.court court_id = map_cl_to_pacer_id(court.pk) if self.pacer_doc_id: - if court.jurisdiction == Court.FEDERAL_APPELLATE: - template = "https://ecf.%s.uscourts.gov/docs1/%s?caseId=%s" + if self.pacer_doc_id.count("-") > 1: + # Redirects users to the ACMS Docket Report page. + # Loading the docket report is an essential step to + # access the download confirmation page. + return self.docket_entry.docket.pacer_docket_url else: - template = "https://ecf.%s.uscourts.gov/doc1/%s?caseid=%s" - return template % ( - court_id, - self.pacer_doc_id, - self.docket_entry.docket.pacer_case_id, - ) + if court.jurisdiction == Court.FEDERAL_APPELLATE: + template = "https://ecf.%s.uscourts.gov/docs1/%s?caseId=%s" + else: + template = "https://ecf.%s.uscourts.gov/doc1/%s?caseid=%s" + return template % ( + court_id, + self.pacer_doc_id, + self.docket_entry.docket.pacer_case_id, + ) else: if court.jurisdiction == Court.FEDERAL_APPELLATE: return "" From 2956ad29f76b45a5e91276cbdf3e0c956b252299 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 30 Nov 2023 09:36:12 -0400 Subject: [PATCH 28/41] refactor(search): Tweak the property to get URLs for RECAP documents. Removes unnecessary level of hierarchy to compute URLs and adds a comment about the issue with the ACMS download page. --- cl/search/models.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cl/search/models.py b/cl/search/models.py index 520fca7f93..0789a63916 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -1520,20 +1520,20 @@ def pacer_url(self) -> str | None: court_id = map_cl_to_pacer_id(court.pk) if self.pacer_doc_id: if self.pacer_doc_id.count("-") > 1: - # Redirects users to the ACMS Docket Report page. - # Loading the docket report is an essential step to - # access the download confirmation page. + # It seems like loading the ACMS Download Page using links is not + # possible. we've implemented a modal window that explains this + # issue and guides users towards using the button to access the + # docket report. return self.docket_entry.docket.pacer_docket_url + elif court.jurisdiction == Court.FEDERAL_APPELLATE: + template = "https://ecf.%s.uscourts.gov/docs1/%s?caseId=%s" else: - if court.jurisdiction == Court.FEDERAL_APPELLATE: - template = "https://ecf.%s.uscourts.gov/docs1/%s?caseId=%s" - else: - template = "https://ecf.%s.uscourts.gov/doc1/%s?caseid=%s" - return template % ( - court_id, - self.pacer_doc_id, - self.docket_entry.docket.pacer_case_id, - ) + template = "https://ecf.%s.uscourts.gov/doc1/%s?caseid=%s" + return template % ( + court_id, + self.pacer_doc_id, + self.docket_entry.docket.pacer_case_id, + ) else: if court.jurisdiction == Court.FEDERAL_APPELLATE: return "" From 7553f144e3f70d367e5449496e9faa540adf3f66 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 30 Nov 2023 09:55:50 -0400 Subject: [PATCH 29/41] feat(recap): Adds a template for rendering modal window for ACMS docs --- cl/opinion_page/static/js/buy_pacer_modal.js | 21 ++++++++++++++- .../templates/includes/buy_acms_modal.html | 26 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 cl/opinion_page/templates/includes/buy_acms_modal.html diff --git a/cl/opinion_page/static/js/buy_pacer_modal.js b/cl/opinion_page/static/js/buy_pacer_modal.js index 44dd9b9b08..0bf8ecda1e 100644 --- a/cl/opinion_page/static/js/buy_pacer_modal.js +++ b/cl/opinion_page/static/js/buy_pacer_modal.js @@ -12,6 +12,20 @@ $(document).ready(function () { } }); + + $('.open_buy_acms_modal').on('click', function (e) { + //Modal clicked + //check if ctrl or shift key pressed + if (e.metaKey || e.shiftKey) { + //prevent modal from opening, go directly to href link + e.stopPropagation(); + }else { + //otherwise open modal and concatenate pacer URL to button + let pacer_url = $(this).attr('href'); + $('#acms_url').attr('href', pacer_url); + } + }); + ////////////////////////// // Modal Cookie Handling// ////////////////////////// @@ -23,6 +37,11 @@ $(document).ready(function () { document.cookie = 'buy_on_pacer_modal=true' + expires + '; samesite=lax; path=/'; ///Close Modal - $('#modal-buy-pacer ').modal('toggle'); + $('#modal-buy-pacer').modal('toggle'); + }); + + $('#acms_url').on('click', function (e) { + ///Close Modal + $('#modal-buy-acms').modal('toggle'); }); }); diff --git a/cl/opinion_page/templates/includes/buy_acms_modal.html b/cl/opinion_page/templates/includes/buy_acms_modal.html new file mode 100644 index 0000000000..237623db62 --- /dev/null +++ b/cl/opinion_page/templates/includes/buy_acms_modal.html @@ -0,0 +1,26 @@ +{% load humanize %} + From 2df7ee3e3a999fdda147f06167015ae4601db5e8 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 30 Nov 2023 10:14:26 -0400 Subject: [PATCH 30/41] feat(recap): Tweaks templates to include new ACMS modal window --- cl/favorites/templates/tag.html | 1 + cl/opinion_page/templates/docket_tabs.html | 1 + .../templates/includes/de_list.html | 35 +++++++++++++++---- .../includes/rd_download_button.html | 28 ++++++++++----- cl/opinion_page/templates/recap_document.html | 1 + 5 files changed, 52 insertions(+), 14 deletions(-) diff --git a/cl/favorites/templates/tag.html b/cl/favorites/templates/tag.html index 4c554848eb..322b2ea998 100644 --- a/cl/favorites/templates/tag.html +++ b/cl/favorites/templates/tag.html @@ -49,6 +49,7 @@

{{
    {% endif %} {% include "includes/buy_pacer_modal.html" %} + {% include "includes/buy_acms_modal.html" %} {% include "includes/docket_li.html" %} {% if forloop.last %}
diff --git a/cl/opinion_page/templates/docket_tabs.html b/cl/opinion_page/templates/docket_tabs.html index be730cf53f..49851f6524 100644 --- a/cl/opinion_page/templates/docket_tabs.html +++ b/cl/opinion_page/templates/docket_tabs.html @@ -39,6 +39,7 @@ src="{% static "js/buy_pacer_modal.js" %}"> {% include "includes/buy_pacer_modal.html" %} + {% include "includes/buy_acms_modal.html" %} {% include "includes/date_picker.html" %}