From 14756dc59d1a87ff775fbd1809f9f7be91b8e4c2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Jun 2023 23:04:20 +0000 Subject: [PATCH 001/372] build(deps): bump redis from 3.5.3 to 4.5.5 Bumps [redis](https://github.com/redis/redis-py) from 3.5.3 to 4.5.5. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/3.5.3...v4.5.5) --- updated-dependencies: - dependency-name: redis dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- poetry.lock | 273 ++++++++++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 2 +- 2 files changed, 262 insertions(+), 13 deletions(-) diff --git a/poetry.lock b/poetry.lock index d923189ec5..e97391ad90 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "amqp" version = "5.1.1" description = "Low-level AMQP client for Python (fork of amqplib)." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -18,6 +19,7 @@ vine = ">=5.0.0" name = "anyio" version = "3.6.2" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.6.2" files = [ @@ -38,6 +40,7 @@ trio = ["trio (>=0.16,<0.22)"] name = "appnope" version = "0.1.3" description = "Disable App Nap on macOS >= 10.9" +category = "main" optional = false python-versions = "*" files = [ @@ -49,6 +52,7 @@ files = [ name = "argparse" version = "1.4.0" description = "Python command-line parsing library" +category = "main" optional = false python-versions = "*" files = [ @@ -60,6 +64,7 @@ files = [ name = "asgiref" version = "3.6.0" description = "ASGI specs, helper code, and adapters" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -74,6 +79,7 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] name = "astor" version = "0.8.1" description = "Read/rewrite/write Python ASTs" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ @@ -85,6 +91,7 @@ files = [ name = "astroid" version = "2.15.4" description = "An abstract syntax tree for Python with inference support." +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -104,6 +111,7 @@ wrapt = [ name = "asttokens" version = "2.0.8" description = "Annotate AST trees with source code positions" +category = "main" optional = false python-versions = "*" files = [ @@ -121,6 +129,7 @@ test = ["astroid (<=2.5.3)", "pytest"] name = "async-generator" version = "1.10" description = "Async generators and context managers for Python 3.5+" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -128,10 +137,23 @@ files = [ {file = "async_generator-1.10.tar.gz", hash = "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"}, ] +[[package]] +name = "async-timeout" +version = "4.0.2" +description = "Timeout context manager for asyncio programs" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, + {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, +] + [[package]] name = "attrs" version = "20.3.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -149,6 +171,7 @@ tests-no-zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (> name = "autobahn" version = "23.1.2" description = "WebSocket client & server library, WAMP real-time framework" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -177,6 +200,7 @@ xbr = ["base58 (>=2.1.0)", "cbor2 (>=5.2.0)", "click (>=8.1.2)", "ecdsa (>=0.16. name = "automat" version = "22.10.0" description = "Self-service finite-state machines for the programmer on the go." +category = "main" optional = false python-versions = "*" files = [ @@ -195,6 +219,7 @@ visualize = ["Twisted (>=16.1.1)", "graphviz (>0.5.1)"] name = "backcall" version = "0.2.0" description = "Specifications for callback functions passed in to an API" +category = "main" optional = false python-versions = "*" files = [ @@ -206,6 +231,7 @@ files = [ name = "beautifulsoup4" version = "4.11.2" description = "Screen-scraping library" +category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -224,6 +250,7 @@ lxml = ["lxml"] name = "billiard" version = "4.1.0" description = "Python multiprocessing fork with improvements and bugfixes" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -235,6 +262,7 @@ files = [ name = "black" version = "23.3.0" description = "The uncompromising code formatter." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -283,6 +311,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "boto3" version = "1.17.43" description = "The AWS SDK for Python" +category = "main" optional = false python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -299,6 +328,7 @@ s3transfer = ">=0.3.0,<0.4.0" name = "botocore" version = "1.20.43" description = "Low-level, data-driven core of boto 3." +category = "main" optional = false python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -318,6 +348,7 @@ crt = ["awscrt (==0.10.8)"] name = "celery" version = "5.3.0" description = "Distributed Task Queue." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -373,6 +404,7 @@ zstd = ["zstandard (==0.21.0)"] name = "certifi" version = "2022.12.7" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -384,6 +416,7 @@ files = [ name = "cffi" version = "1.14.5" description = "Foreign Function Interface for Python calling C code." +category = "main" optional = false python-versions = "*" files = [ @@ -445,6 +478,7 @@ pycparser = "*" name = "cfgv" version = "3.3.1" description = "Validate configuration and produce human readable error messages." +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -456,6 +490,7 @@ files = [ name = "chardet" version = "5.1.0" description = "Universal encoding detector for Python 3" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -467,6 +502,7 @@ files = [ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -551,6 +587,7 @@ files = [ name = "click" version = "8.1.2" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -565,6 +602,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "click-didyoumean" version = "0.3.0" description = "Enables git-like *did-you-mean* feature in click" +category = "main" optional = false python-versions = ">=3.6.2,<4.0.0" files = [ @@ -579,6 +617,7 @@ click = ">=7" name = "click-plugins" version = "1.1.1" description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +category = "main" optional = false python-versions = "*" files = [ @@ -596,6 +635,7 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] name = "click-repl" version = "0.2.0" description = "REPL plugin for Click" +category = "main" optional = false python-versions = "*" files = [ @@ -612,6 +652,7 @@ six = "*" name = "climage" version = "0.1.3" description = "Convert images to beautiful ANSI escape codes" +category = "main" optional = false python-versions = ">=3.2" files = [ @@ -627,6 +668,7 @@ Pillow = "*" name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -638,6 +680,7 @@ files = [ name = "constantly" version = "15.1.0" description = "Symbolic constants in Python" +category = "main" optional = false python-versions = "*" files = [ @@ -649,6 +692,7 @@ files = [ name = "contextlib2" version = "0.6.0.post1" description = "Backports and enhancements for the contextlib module" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -660,6 +704,7 @@ files = [ name = "coreapi" version = "2.3.3" description = "Python client library for Core API." +category = "main" optional = false python-versions = "*" files = [ @@ -677,6 +722,7 @@ uritemplate = "*" name = "coreschema" version = "0.0.4" description = "Core Schema." +category = "main" optional = false python-versions = "*" files = [ @@ -691,6 +737,7 @@ jinja2 = "*" name = "courts-db" version = "0.10.9" description = "Database of Courts" +category = "main" optional = false python-versions = "*" files = [ @@ -702,6 +749,7 @@ files = [ name = "cryptography" version = "36.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -742,6 +790,7 @@ test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0 name = "cssselect" version = "1.2.0" description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -753,6 +802,7 @@ files = [ name = "daphne" version = "4.0.0" description = "Django ASGI (HTTP/WebSocket) server" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -772,6 +822,7 @@ tests = ["django", "hypothesis", "pytest", "pytest-asyncio"] name = "datasketch" version = "1.5.7" description = "Probabilistic data structures for processing and searching very large datasets" +category = "main" optional = false python-versions = "*" files = [ @@ -794,6 +845,7 @@ test = ["cassandra-driver (>=3.20)", "coverage", "mock (>=2.0.0)", "mockredispy" name = "dateparser" version = "1.1.8" description = "Date parsing library designed to parse dates from HTML pages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -816,6 +868,7 @@ langdetect = ["langdetect"] name = "decorator" version = "5.1.1" description = "Decorators for Humans" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -827,6 +880,7 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -838,6 +892,7 @@ files = [ name = "dill" version = "0.3.6" description = "serialize all of python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -852,6 +907,7 @@ graph = ["objgraph (>=1.7.2)"] name = "disposable-email-domains" version = "0.0.64" description = "A set of disposable email domains" +category = "main" optional = false python-versions = "*" files = [ @@ -866,6 +922,7 @@ dev = ["check-manifest"] name = "distlib" version = "0.3.6" description = "Distribution utilities" +category = "dev" optional = false python-versions = "*" files = [ @@ -877,6 +934,7 @@ files = [ name = "django" version = "4.2.1" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -897,6 +955,7 @@ bcrypt = ["bcrypt"] name = "django-admin-cursor-paginator" version = "0.1.2" description = "Drop-in replacement for django admin default pagination that works fast with huge tables." +category = "main" optional = false python-versions = ">=3.4" files = [ @@ -911,6 +970,7 @@ Django = ">=2.0" name = "django-cache-memoize" version = "0.1.8" description = "Django utility for a memoization decorator that uses the Django cache framework." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -925,6 +985,7 @@ dev = ["black", "flake8", "therapist", "tox", "twine"] name = "django-cors-headers" version = "3.14.0" description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -939,6 +1000,7 @@ Django = ">=3.2" name = "django-debug-toolbar" version = "4.0.0" description = "A configurable set of panels that display various debug information about the current request/response." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -954,6 +1016,7 @@ sqlparse = ">=0.2" name = "django-elasticsearch-dsl" version = "7.3" description = "Wrapper around elasticsearch-dsl-py for django models" +category = "main" optional = false python-versions = "*" files = [ @@ -969,6 +1032,7 @@ six = "*" name = "django-environ" version = "0.8.1" description = "A package that allows you to utilize 12factor inspired environment variables to configure your Django application." +category = "main" optional = false python-versions = ">=3.4,<4" files = [ @@ -977,14 +1041,15 @@ files = [ ] [package.extras] -develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.dev0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] -docs = ["furo (>=2021.8.17b43,<2021.9.dev0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] +develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] +docs = ["furo (>=2021.8.17b43,<2021.9.0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] testing = ["coverage[toml] (>=5.0a4)", "pytest (>=4.6.11)"] [[package]] name = "django-extensions" version = "3.2.1" description = "Extensions for Django" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -999,6 +1064,7 @@ Django = ">=3.2" name = "django-filter" version = "2.4.0" description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1013,6 +1079,7 @@ Django = ">=2.2" name = "django-hcaptcha" version = "0.2.0" description = "Django hCaptcha provides a simple way to protect your django forms using hCaptcha" +category = "main" optional = false python-versions = "*" files = [ @@ -1024,6 +1091,7 @@ files = [ name = "django-localflavor" version = "3.1" description = "Country-specific Django helpers" +category = "main" optional = false python-versions = "*" files = [ @@ -1039,6 +1107,7 @@ python-stdnum = ">=1.6" name = "django-markdown-deux" version = "1.0.6" description = "a Django app that provides template tags for using Markdown (using the python-markdown2 processor)" +category = "main" optional = false python-versions = "*" files = [ @@ -1052,6 +1121,7 @@ markdown2 = "*" name = "django-mathfilters" version = "1.0.0" description = "A set of simple math filters for Django" +category = "main" optional = false python-versions = "*" files = [ @@ -1063,6 +1133,7 @@ files = [ name = "django-override-storage" version = "0.3.2" description = "Django test helpers to manage file storage side effects." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1074,6 +1145,7 @@ files = [ name = "django-pghistory" version = "2.7.0" description = "History tracking for Django and Postgres" +category = "main" optional = false python-versions = ">=3.7.0,<4" files = [ @@ -1089,6 +1161,7 @@ django-pgtrigger = ">=4.5.0" name = "django-pgtrigger" version = "4.6.0" description = "Postgres trigger support integrated with Django models." +category = "main" optional = false python-versions = ">=3.7.0,<4" files = [ @@ -1103,6 +1176,7 @@ django = ">=2" name = "django-ratelimit" version = "4.0.0" description = "Cache-based rate-limiting for Django." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1114,6 +1188,7 @@ files = [ name = "django-ses" version = "3.3.0" description = "A Django email backend for Amazon's Simple Email Service" +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1136,6 +1211,7 @@ events = ["cryptography (>=36.0.2)", "requests (>=2.27.1)"] name = "django-storages" version = "1.13.2" description = "Support for many storage backends in Django" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1158,6 +1234,7 @@ sftp = ["paramiko (>=1.10.0)"] name = "django-stubs" version = "4.2.0" description = "Mypy stubs for Django" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1181,6 +1258,7 @@ compatible-mypy = ["mypy (>=1.2.0,<1.3)"] name = "django-stubs-ext" version = "4.2.0" description = "Monkey-patching and extensions for django-stubs" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1196,6 +1274,7 @@ typing-extensions = "*" name = "django-waffle" version = "3.0.0" description = "A feature flipper for Django." +category = "main" optional = false python-versions = "*" files = [ @@ -1207,6 +1286,7 @@ files = [ name = "djangorestframework" version = "3.14.0" description = "Web APIs for Django, made easy." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1222,6 +1302,7 @@ pytz = "*" name = "djangorestframework-filters" version = "1.0.0.dev2" description = "Better filtering for Django REST Framework" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1237,6 +1318,7 @@ djangorestframework = "*" name = "djangorestframework-stubs" version = "3.14.0" description = "PEP-484 stubs for django-rest-framework" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1261,6 +1343,7 @@ markdown = ["types-Markdown (>=0.1.5)"] name = "djangorestframework-xml" version = "2.0.0" description = "XML support for Django REST Framework" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1280,6 +1363,7 @@ tests = ["Django (>=1.6)", "djangorestframework (>=2.4.3)", "flake8", "pytest", name = "docopt" version = "0.6.2" description = "Pythonic argument parser, that will make you smile" +category = "main" optional = false python-versions = "*" files = [ @@ -1290,6 +1374,7 @@ files = [ name = "drf-dynamic-fields" version = "0.3.1" description = "Dynamically return subset of Django REST Framework serializer fields" +category = "main" optional = false python-versions = "*" files = [ @@ -1301,6 +1386,7 @@ files = [ name = "elasticsearch" version = "7.17.9" description = "Python client for Elasticsearch" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" files = [ @@ -1322,6 +1408,7 @@ requests = ["requests (>=2.4.0,<3.0.0)"] name = "elasticsearch-dsl" version = "7.4.0" description = "Python client for Elasticsearch" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1341,6 +1428,7 @@ develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytes name = "exceptiongroup" version = "1.1.1" description = "Backport of PEP 654 (exception groups)" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1355,6 +1443,7 @@ test = ["pytest (>=6)"] name = "executing" version = "1.1.0" description = "Get the currently executing AST node of a frame, and other information" +category = "main" optional = false python-versions = "*" files = [ @@ -1369,6 +1458,7 @@ tests = ["asttokens", "littleutils", "pytest", "rich"] name = "exrex" version = "0.11.0" description = "Irregular methods for regular expressions" +category = "dev" optional = false python-versions = "*" files = [ @@ -1380,6 +1470,7 @@ files = [ name = "eyecite" version = "2.4.0" description = "Tool for extracting legal citations from text strings." +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1399,6 +1490,7 @@ reporters-db = ">=3.2.2,<4.0.0" name = "factory-boy" version = "3.2.1" description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1417,6 +1509,7 @@ doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"] name = "faker" version = "13.3.1" description = "Faker is a Python package that generates fake data for you." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1431,6 +1524,7 @@ python-dateutil = ">=2.4" name = "fast-diff-match-patch" version = "2.0.1" description = "fast_diff_match_patch: Python package wrapping the C++ implementation of google-diff-match-patch" +category = "main" optional = false python-versions = "*" files = [ @@ -1506,6 +1600,7 @@ files = [ name = "feedparser" version = "6.0.10" description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1520,6 +1615,7 @@ sgmllib3k = "*" name = "filelock" version = "3.9.0" description = "A platform independent file lock." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1535,6 +1631,7 @@ testing = ["covdefaults (>=2.2.2)", "coverage (>=7.0.1)", "pytest (>=7.2)", "pyt name = "flake8" version = "6.0.0" description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" optional = false python-versions = ">=3.8.1" files = [ @@ -1551,6 +1648,7 @@ pyflakes = ">=3.0.0,<3.1.0" name = "flynt" version = "0.78" description = "CLI tool to convert a python project's %-formatted strings to f-strings." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1569,6 +1667,7 @@ dev = ["build", "pre-commit", "pytest", "pytest-cov", "twine"] name = "future" version = "0.18.3" description = "Clean single-source support for Python 3 and 2" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1579,6 +1678,7 @@ files = [ name = "fuzzywuzzy" version = "0.18.0" description = "Fuzzy string matching in python" +category = "main" optional = false python-versions = "*" files = [ @@ -1593,6 +1693,7 @@ speedup = ["python-levenshtein (>=0.12)"] name = "geonamescache" version = "1.6.0" description = "Geonames data for continents, cities and US states." +category = "main" optional = false python-versions = "*" files = [ @@ -1604,6 +1705,7 @@ files = [ name = "gunicorn" version = "20.1.0" description = "WSGI HTTP Server for UNIX" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1624,6 +1726,7 @@ tornado = ["tornado (>=0.2)"] name = "h11" version = "0.13.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1635,6 +1738,7 @@ files = [ name = "html5lib" version = "1.1" description = "HTML parser based on the WHATWG HTML specification" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1656,6 +1760,7 @@ lxml = ["lxml"] name = "httplib2" version = "0.22.0" description = "A comprehensive HTTP client library." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1670,6 +1775,7 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0 name = "httptools" version = "0.5.0" description = "A collection of framework independent HTTP protocol utils." +category = "main" optional = false python-versions = ">=3.5.0" files = [ @@ -1723,6 +1829,7 @@ test = ["Cython (>=0.29.24,<0.30.0)"] name = "hyperlink" version = "21.0.0" description = "A featureful, immutable, and correct URL for Python." +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1737,6 +1844,7 @@ idna = ">=2.5" name = "identify" version = "2.5.17" description = "File identification library for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1751,6 +1859,7 @@ license = ["ukkonen"] name = "idna" version = "2.10" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1762,6 +1871,7 @@ files = [ name = "igraph" version = "0.10.4" description = "High performance graph data structures and algorithms" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1828,6 +1938,7 @@ test-musl = ["networkx (>=2.5)", "pytest (>=7.0.1)", "pytest-timeout (>=2.1.0)"] name = "incremental" version = "22.10.0" description = "\"A small library that versions your Python projects.\"" +category = "main" optional = false python-versions = "*" files = [ @@ -1843,6 +1954,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"] name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1854,6 +1966,7 @@ files = [ name = "internetarchive" version = "3.3.0" description = "A Python interface to archive.org." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1879,6 +1992,7 @@ types = ["tqdm-stubs (>=0.2.0)", "types-colorama", "types-docopt (>=0.6.10,<0.7. name = "ipaddress" version = "1.0.23" description = "IPv4/IPv6 manipulation library" +category = "main" optional = false python-versions = "*" files = [ @@ -1890,6 +2004,7 @@ files = [ name = "ipython" version = "8.10.0" description = "IPython: Productive Interactive Computing" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1928,6 +2043,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa name = "isort" version = "5.8.0" description = "A Python utility / library to sort Python imports." +category = "dev" optional = false python-versions = ">=3.6,<4.0" files = [ @@ -1944,6 +2060,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "itypes" version = "1.2.0" description = "Simple immutable types for python." +category = "main" optional = false python-versions = "*" files = [ @@ -1955,6 +2072,7 @@ files = [ name = "jedi" version = "0.18.1" description = "An autocompletion tool for Python that can be used for text editors." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1973,6 +2091,7 @@ testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"] name = "jinja2" version = "2.11.3" description = "A very fast and expressive template engine." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1990,6 +2109,7 @@ i18n = ["Babel (>=0.8)"] name = "jmespath" version = "0.10.0" description = "JSON Matching Expressions" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2001,6 +2121,7 @@ files = [ name = "jsonpatch" version = "1.32" description = "Apply JSON-Patches (RFC 6902)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -2015,6 +2136,7 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "2.1" description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2026,6 +2148,7 @@ files = [ name = "judge-pics" version = "2.0.2" description = "Database of Judge Pictures" +category = "main" optional = false python-versions = "*" files = [ @@ -2045,6 +2168,7 @@ requests = ">=2.0,<3.0" name = "juriscraper" version = "2.5.49" description = "An API to scrape American court websites for metadata." +category = "main" optional = false python-versions = "*" files = [ @@ -2072,6 +2196,7 @@ tldextract = "*" name = "kdtree" version = "0.16" description = "A Python implemntation of a kd-tree" +category = "main" optional = false python-versions = "*" files = [ @@ -2083,6 +2208,7 @@ files = [ name = "kombu" version = "5.3.0" description = "Messaging library for Python." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2115,6 +2241,7 @@ zookeeper = ["kazoo (>=2.8.0)"] name = "lazy-object-proxy" version = "1.6.0" description = "A fast and thorough lazy object proxy." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -2146,6 +2273,7 @@ files = [ name = "lxml" version = "4.9.1" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" files = [ @@ -2231,6 +2359,7 @@ source = ["Cython (>=0.29.7)"] name = "lxml-stubs" version = "0.4.0" description = "Type annotations for the lxml package" +category = "dev" optional = false python-versions = "*" files = [ @@ -2245,6 +2374,7 @@ test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1. name = "markdown2" version = "2.4.0" description = "A fast and complete Python implementation of Markdown" +category = "main" optional = false python-versions = ">=3.5, <4" files = [ @@ -2256,6 +2386,7 @@ files = [ name = "markupsafe" version = "1.1.1" description = "Safely add untrusted strings to HTML/XML markup." +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" files = [ @@ -2317,6 +2448,7 @@ files = [ name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -2331,6 +2463,7 @@ traitlets = "*" name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2342,6 +2475,7 @@ files = [ name = "mypy" version = "1.2.0" description = "Optional static typing for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2388,6 +2522,7 @@ reports = ["lxml"] name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2399,6 +2534,7 @@ files = [ name = "nameparser" version = "1.1.1" description = "A simple Python module for parsing human names into their individual components." +category = "main" optional = false python-versions = "*" files = [ @@ -2410,6 +2546,7 @@ files = [ name = "natsort" version = "8.3.1" description = "Simple yet flexible natural sorting in Python." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2425,8 +2562,9 @@ icu = ["PyICU (>=1.0.0)"] name = "ndg-httpsclient" version = "0.5.1" description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL" +category = "main" optional = false -python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0" +python-versions = ">=2.7,<3.0.0 || >=3.4.0" files = [ {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"}, {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"}, @@ -2441,6 +2579,7 @@ PyOpenSSL = "*" name = "networkx" version = "3.1" description = "Python package for creating and manipulating graphs and networks" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2459,6 +2598,7 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] name = "nodeenv" version = "1.7.0" description = "Node.js virtual environment builder" +category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -2473,6 +2613,7 @@ setuptools = "*" name = "nose" version = "1.3.7" description = "nose extends unittest to make testing easier" +category = "main" optional = false python-versions = "*" files = [ @@ -2485,6 +2626,7 @@ files = [ name = "numpy" version = "1.24.2" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2522,6 +2664,7 @@ files = [ name = "openapi-codec" version = "1.3.2" description = "An OpenAPI codec for Core API." +category = "main" optional = false python-versions = "*" files = [ @@ -2535,6 +2678,7 @@ coreapi = ">=2.2.0" name = "outcome" version = "1.2.0" description = "Capture the outcome of Python function calls." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2549,6 +2693,7 @@ attrs = ">=19.2.0" name = "packaging" version = "23.1" description = "Core utilities for Python packages" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2560,6 +2705,7 @@ files = [ name = "pandas" version = "1.5.0" description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2604,6 +2750,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] name = "parso" version = "0.8.3" description = "A Python Parser" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2619,6 +2766,7 @@ testing = ["docopt", "pytest (<6.0.0)"] name = "pathspec" version = "0.9.0" description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -2630,6 +2778,7 @@ files = [ name = "pexpect" version = "4.8.0" description = "Pexpect allows easy control of interactive console applications." +category = "main" optional = false python-versions = "*" files = [ @@ -2644,6 +2793,7 @@ ptyprocess = ">=0.5" name = "pickleshare" version = "0.7.5" description = "Tiny 'shelve'-like database with concurrency support" +category = "main" optional = false python-versions = "*" files = [ @@ -2655,6 +2805,7 @@ files = [ name = "pillow" version = "9.3.0" description = "Python Imaging Library (Fork)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2729,6 +2880,7 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa name = "platformdirs" version = "2.5.1" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2744,6 +2896,7 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock name = "pluggy" version = "0.13.1" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2758,6 +2911,7 @@ dev = ["pre-commit", "tox"] name = "pre-commit" version = "3.3.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2776,6 +2930,7 @@ virtualenv = ">=20.10.0" name = "probableparsing" version = "0.0.1" description = "Common methods for propbable parsers" +category = "main" optional = false python-versions = "*" files = [ @@ -2787,6 +2942,7 @@ files = [ name = "prompt-toolkit" version = "3.0.31" description = "Library for building powerful interactive command lines in Python" +category = "main" optional = false python-versions = ">=3.6.2" files = [ @@ -2801,6 +2957,7 @@ wcwidth = "*" name = "psycopg2" version = "2.9.5" description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2823,6 +2980,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" +category = "main" optional = false python-versions = "*" files = [ @@ -2834,6 +2992,7 @@ files = [ name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" +category = "main" optional = false python-versions = "*" files = [ @@ -2848,6 +3007,7 @@ tests = ["pytest"] name = "pyahocorasick" version = "1.4.2" description = "pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search. With the ahocorasick.Automaton class, you can find multiple key strings occurrences at once in some input text. You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search. Implemented in C and tested on Python 2.7 and 3.4+. Works on Linux, Mac and Windows. BSD-3-clause license." +category = "main" optional = false python-versions = "*" files = [ @@ -2858,6 +3018,7 @@ files = [ name = "pyasn1" version = "0.4.8" description = "ASN.1 types and codecs" +category = "main" optional = false python-versions = "*" files = [ @@ -2869,6 +3030,7 @@ files = [ name = "pyasn1-modules" version = "0.3.0" description = "A collection of ASN.1-based protocols modules" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -2883,6 +3045,7 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pycodestyle" version = "2.10.0" description = "Python style guide checker" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2894,6 +3057,7 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2905,6 +3069,7 @@ files = [ name = "pyflakes" version = "3.0.1" description = "passive checker of Python programs" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2916,6 +3081,7 @@ files = [ name = "pygments" version = "2.13.0" description = "Pygments is a syntax highlighting package written in Python." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2930,6 +3096,7 @@ plugins = ["importlib-metadata"] name = "pylint" version = "2.17.3" description = "python code static checker" +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -2958,6 +3125,7 @@ testutils = ["gitpython (>3)"] name = "pyopenssl" version = "20.0.1" description = "Python wrapper module around the OpenSSL library" +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -2977,6 +3145,7 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"] name = "pyparsing" version = "2.4.7" description = "Python parsing module" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2988,6 +3157,7 @@ files = [ name = "pysocks" version = "1.7.1" description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3000,6 +3170,7 @@ files = [ name = "pystemmer" version = "2.0.1" description = "Snowball stemming algorithms, for information retrieval" +category = "main" optional = false python-versions = "*" files = [ @@ -3010,6 +3181,7 @@ files = [ name = "pytest" version = "7.3.1" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3032,6 +3204,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-django" version = "4.5.2" description = "A Django plugin for pytest." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3050,6 +3223,7 @@ testing = ["Django", "django-configurations (>=2.0)"] name = "python-crfsuite" version = "0.9.9" description = "Python binding for CRFsuite" +category = "main" optional = false python-versions = "*" files = [ @@ -3100,6 +3274,7 @@ files = [ name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -3114,6 +3289,7 @@ six = ">=1.5" name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3128,6 +3304,7 @@ cli = ["click (>=5.0)"] name = "python-levenshtein" version = "0.12.2" description = "Python extension for computing string edit distances and similarities." +category = "main" optional = false python-versions = "*" files = [ @@ -3141,6 +3318,7 @@ setuptools = "*" name = "python-magic" version = "0.4.22" description = "File type identification using libmagic" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3152,6 +3330,7 @@ files = [ name = "python-stdnum" version = "1.16" description = "Python module to handle standardized numbers and codes" +category = "main" optional = false python-versions = "*" files = [ @@ -3168,6 +3347,7 @@ soap-fallback = ["PySimpleSOAP"] name = "pytz" version = "2021.1" description = "World timezone definitions, modern and historical" +category = "main" optional = false python-versions = "*" files = [ @@ -3179,6 +3359,7 @@ files = [ name = "pytz-deprecation-shim" version = "0.1.0.post0" description = "Shims to make deprecation of pytz easier" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -3193,6 +3374,7 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""} name = "pyyaml" version = "5.4.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3229,22 +3411,28 @@ files = [ [[package]] name = "redis" -version = "3.5.3" -description = "Python client for Redis key-value store" +version = "4.5.5" +description = "Python client for Redis database and key-value store" +category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.7" files = [ - {file = "redis-3.5.3-py2.py3-none-any.whl", hash = "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24"}, - {file = "redis-3.5.3.tar.gz", hash = "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2"}, + {file = "redis-4.5.5-py3-none-any.whl", hash = "sha256:77929bc7f5dab9adf3acba2d3bb7d7658f1e0c2f1cafe7eb36434e751c471119"}, + {file = "redis-4.5.5.tar.gz", hash = "sha256:dc87a0bdef6c8bfe1ef1e1c40be7034390c2ae02d92dcd0c7ca1729443899880"}, ] +[package.dependencies] +async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2\""} + [package.extras] -hiredis = ["hiredis (>=0.1.3)"] +hiredis = ["hiredis (>=1.0.0)"] +ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] [[package]] name = "regex" version = "2022.1.18" description = "Alternative regular expression module, to replace re." +category = "main" optional = false python-versions = "*" files = [ @@ -3328,6 +3516,7 @@ files = [ name = "reporters-db" version = "3.2.36" description = "Database of Court Reporters" +category = "main" optional = false python-versions = "*" files = [ @@ -3342,6 +3531,7 @@ six = ">=1.0.0" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3363,6 +3553,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-file" version = "1.5.1" description = "File transport adapter for Requests" +category = "main" optional = false python-versions = "*" files = [ @@ -3378,6 +3569,7 @@ six = "*" name = "s3transfer" version = "0.3.6" description = "An Amazon S3 Transfer Manager" +category = "main" optional = false python-versions = "*" files = [ @@ -3392,6 +3584,7 @@ botocore = ">=1.12.36,<2.0a.0" name = "schema" version = "0.7.4" description = "Simple data validation library" +category = "main" optional = false python-versions = "*" files = [ @@ -3406,6 +3599,7 @@ contextlib2 = ">=0.5.5" name = "scipy" version = "1.10.1" description = "Fundamental algorithms for scientific computing in Python" +category = "main" optional = false python-versions = "<3.12,>=3.8" files = [ @@ -3444,6 +3638,7 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo name = "scorched" version = "0.13.1.dev0" description = "" +category = "main" optional = false python-versions = "*" files = [] @@ -3466,6 +3661,7 @@ resolved_reference = "0632024e72e22a71e17cdb778805561f7cdd33d8" name = "selenium" version = "4.9.1" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3483,6 +3679,7 @@ urllib3 = {version = ">=1.26,<3", extras = ["socks"]} name = "sentry-sdk" version = "1.14.0" description = "Python client for Sentry (https://sentry.io)" +category = "main" optional = false python-versions = "*" files = [ @@ -3521,6 +3718,7 @@ tornado = ["tornado (>=5)"] name = "service-identity" version = "21.1.0" description = "Service identity verification for pyOpenSSL & cryptography." +category = "main" optional = false python-versions = "*" files = [ @@ -3545,6 +3743,7 @@ tests = ["coverage[toml] (>=5.0.2)", "pytest"] name = "setuptools" version = "65.5.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3561,6 +3760,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "sgmllib3k" version = "1.0.0" description = "Py3k port of sgmllib." +category = "main" optional = false python-versions = "*" files = [ @@ -3571,6 +3771,7 @@ files = [ name = "simplejson" version = "3.18.3" description = "Simple, fast, extensible JSON encoder/decoder for Python" +category = "main" optional = false python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3653,6 +3854,7 @@ files = [ name = "six" version = "1.15.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3664,6 +3866,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3675,6 +3878,7 @@ files = [ name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +category = "main" optional = false python-versions = "*" files = [ @@ -3686,6 +3890,7 @@ files = [ name = "soupsieve" version = "2.2.1" description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -3697,6 +3902,7 @@ files = [ name = "sqlparse" version = "0.4.4" description = "A non-validating SQL parser." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3713,6 +3919,7 @@ test = ["pytest", "pytest-cov"] name = "stack-data" version = "0.5.1" description = "Extract data from python stack frames and tracebacks for informative displays" +category = "main" optional = false python-versions = "*" files = [ @@ -3732,6 +3939,7 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] name = "stripe" version = "5.2.0" description = "Python bindings for the Stripe API" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3746,6 +3954,7 @@ requests = {version = ">=2.20", markers = "python_version >= \"3.0\""} name = "tblib" version = "1.7.0" description = "Traceback serialization library." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3757,6 +3966,7 @@ files = [ name = "texttable" version = "1.6.4" description = "module for creating simple ASCII tables" +category = "main" optional = false python-versions = "*" files = [ @@ -3768,6 +3978,7 @@ files = [ name = "time-machine" version = "2.9.0" description = "Travel through time in your tests." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3833,6 +4044,7 @@ python-dateutil = "*" name = "timeout-decorator" version = "0.5.0" description = "Timeout decorator" +category = "main" optional = false python-versions = "*" files = [ @@ -3843,6 +4055,7 @@ files = [ name = "tldextract" version = "3.4.0" description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3860,6 +4073,7 @@ requests-file = ">=1.4" name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3871,6 +4085,7 @@ files = [ name = "tomlkit" version = "0.11.8" description = "Style preserving TOML library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3882,6 +4097,7 @@ files = [ name = "tqdm" version = "4.59.0" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ @@ -3898,6 +4114,7 @@ telegram = ["requests"] name = "traitlets" version = "5.4.0" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3912,6 +4129,7 @@ test = ["pre-commit", "pytest"] name = "trio" version = "0.21.0" description = "A friendly Python library for async concurrency and I/O" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3932,6 +4150,7 @@ sortedcontainers = "*" name = "trio-websocket" version = "0.9.2" description = "WebSocket library for Trio" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3948,6 +4167,7 @@ wsproto = ">=0.14" name = "twisted" version = "22.4.0" description = "An asynchronous networking framework written in Python" +category = "main" optional = false python-versions = ">=3.6.7" files = [ @@ -3988,6 +4208,7 @@ windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0. name = "twisted-iocpsupport" version = "1.0.3" description = "An extension for use in the twisted I/O Completion Ports reactor." +category = "main" optional = false python-versions = "*" files = [ @@ -4013,6 +4234,7 @@ files = [ name = "txaio" version = "23.1.1" description = "Compatibility API between asyncio/Twisted/Trollius" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4029,6 +4251,7 @@ twisted = ["twisted (>=20.3.0)", "zope.interface (>=5.2.0)"] name = "types-dateparser" version = "1.1.4.6" description = "Typing stubs for dateparser" +category = "main" optional = false python-versions = "*" files = [ @@ -4040,6 +4263,7 @@ files = [ name = "types-pyopenssl" version = "23.0.0.4" description = "Typing stubs for pyOpenSSL" +category = "dev" optional = false python-versions = "*" files = [ @@ -4054,6 +4278,7 @@ cryptography = ">=35.0.0" name = "types-python-dateutil" version = "2.8.19.12" description = "Typing stubs for python-dateutil" +category = "dev" optional = false python-versions = "*" files = [ @@ -4065,6 +4290,7 @@ files = [ name = "types-pytz" version = "2021.3.5" description = "Typing stubs for pytz" +category = "dev" optional = false python-versions = "*" files = [ @@ -4076,6 +4302,7 @@ files = [ name = "types-pyyaml" version = "6.0.4" description = "Typing stubs for PyYAML" +category = "dev" optional = false python-versions = "*" files = [ @@ -4087,6 +4314,7 @@ files = [ name = "types-redis" version = "4.5.4.1" description = "Typing stubs for redis" +category = "dev" optional = false python-versions = "*" files = [ @@ -4102,6 +4330,7 @@ types-pyOpenSSL = "*" name = "types-requests" version = "2.29.0.0" description = "Typing stubs for requests" +category = "dev" optional = false python-versions = "*" files = [ @@ -4116,6 +4345,7 @@ types-urllib3 = "<1.27" name = "types-simplejson" version = "3.19.0.0" description = "Typing stubs for simplejson" +category = "dev" optional = false python-versions = "*" files = [ @@ -4127,6 +4357,7 @@ files = [ name = "types-urllib3" version = "1.26.11" description = "Typing stubs for urllib3" +category = "dev" optional = false python-versions = "*" files = [ @@ -4138,6 +4369,7 @@ files = [ name = "typing-extensions" version = "4.1.1" description = "Backported and Experimental Type Hints for Python 3.6+" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4149,6 +4381,7 @@ files = [ name = "tzdata" version = "2022.7" description = "Provider of IANA time zone data" +category = "main" optional = false python-versions = ">=2" files = [ @@ -4160,6 +4393,7 @@ files = [ name = "tzlocal" version = "4.2" description = "tzinfo object for the local timezone" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4179,6 +4413,7 @@ test = ["pytest (>=4.3)", "pytest-mock (>=3.3)"] name = "unidecode" version = "1.2.0" description = "ASCII transliterations of Unicode text" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -4190,6 +4425,7 @@ files = [ name = "uritemplate" version = "3.0.1" description = "URI templates" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -4201,6 +4437,7 @@ files = [ name = "urllib3" version = "1.26.15" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -4220,6 +4457,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "usaddress" version = "0.5.10" description = "Parse US addresses using conditional random fields" +category = "main" optional = false python-versions = "*" files = [ @@ -4236,6 +4474,7 @@ python-crfsuite = ">=0.7" name = "uvicorn" version = "0.22.0" description = "The lightning-fast ASGI server." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4250,7 +4489,7 @@ h11 = ">=0.8" httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} @@ -4261,6 +4500,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "uvloop" version = "0.17.0" description = "Fast implementation of asyncio event loop on top of libuv" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4305,6 +4545,7 @@ test = ["Cython (>=0.29.32,<0.30.0)", "aiohttp", "flake8 (>=3.9.2,<3.10.0)", "my name = "vine" version = "5.0.0" description = "Promises, promises, promises." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4316,6 +4557,7 @@ files = [ name = "virtualenv" version = "20.17.1" description = "Virtual Python Environment builder" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -4336,6 +4578,7 @@ testing = ["coverage (>=6.2)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7 name = "watchfiles" version = "0.19.0" description = "Simple, modern and high performance file watching and code reload in python." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4370,6 +4613,7 @@ anyio = ">=3.0.0" name = "wcwidth" version = "0.2.5" description = "Measures the displayed width of unicode strings in a terminal" +category = "main" optional = false python-versions = "*" files = [ @@ -4381,6 +4625,7 @@ files = [ name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" +category = "main" optional = false python-versions = "*" files = [ @@ -4392,6 +4637,7 @@ files = [ name = "websockets" version = "11.0.3" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4471,6 +4717,7 @@ files = [ name = "wrapt" version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -4555,6 +4802,7 @@ files = [ name = "wsproto" version = "1.2.0" description = "WebSockets state-machine based protocol implementation" +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -4569,6 +4817,7 @@ h11 = ">=0.9.0,<1" name = "zope-interface" version = "6.0" description = "Interfaces for Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4615,4 +4864,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.12" -content-hash = "d435d4c7dce4af1c659d4dbe2d712e2091a1514a80439b4e4009404f69bef595" +content-hash = "31793909108232d01e0af0d04665aed89ecf132877c5d9b7bfa6b6be06b18dd1" diff --git a/pyproject.toml b/pyproject.toml index 5ec87e6e65..462d55a9a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ python-dateutil = "^2.8.1" python-magic = "^0.4.21" pytz = "*" pyyaml = "^5.3.1" -redis = "^3.5.3" +redis = "^4.5.5" requests = "^2.31.0" simplejson = "^3.18.3" stripe = "^5.2.0" From 228d4317a8462f5914f9ceb69b76aa56269eba98 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Jun 2023 23:04:51 +0000 Subject: [PATCH 002/372] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e531956054..e9052f9eaf 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ This repository is organized in the following way: - scripts: logrotate, systemd, etc, and init scripts for our various configurations and daemons. -## Getting Involved +## Getting Involved If you want to get involved send us an email with your contact info or take a look through the [issues list][issues]. There are innumerable things we need help with, but we especially are looking for help with: From 0f6cbf528513b4dbe6d3ebb8f2a0f95720718973 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Jun 2023 23:07:57 +0000 Subject: [PATCH 003/372] build(deps): bump drf-dynamic-fields from 0.3.1 to 0.4.0 Bumps [drf-dynamic-fields](https://github.com/dbrgn/drf-dynamic-fields) from 0.3.1 to 0.4.0. - [Changelog](https://github.com/dbrgn/drf-dynamic-fields/blob/master/CHANGELOG.md) - [Commits](https://github.com/dbrgn/drf-dynamic-fields/compare/v0.3.1...v0.4.0) --- updated-dependencies: - dependency-name: drf-dynamic-fields dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 249 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 241 insertions(+), 8 deletions(-) diff --git a/poetry.lock b/poetry.lock index d923189ec5..5cbfed0e6a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "amqp" version = "5.1.1" description = "Low-level AMQP client for Python (fork of amqplib)." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -18,6 +19,7 @@ vine = ">=5.0.0" name = "anyio" version = "3.6.2" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.6.2" files = [ @@ -38,6 +40,7 @@ trio = ["trio (>=0.16,<0.22)"] name = "appnope" version = "0.1.3" description = "Disable App Nap on macOS >= 10.9" +category = "main" optional = false python-versions = "*" files = [ @@ -49,6 +52,7 @@ files = [ name = "argparse" version = "1.4.0" description = "Python command-line parsing library" +category = "main" optional = false python-versions = "*" files = [ @@ -60,6 +64,7 @@ files = [ name = "asgiref" version = "3.6.0" description = "ASGI specs, helper code, and adapters" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -74,6 +79,7 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] name = "astor" version = "0.8.1" description = "Read/rewrite/write Python ASTs" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ @@ -85,6 +91,7 @@ files = [ name = "astroid" version = "2.15.4" description = "An abstract syntax tree for Python with inference support." +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -104,6 +111,7 @@ wrapt = [ name = "asttokens" version = "2.0.8" description = "Annotate AST trees with source code positions" +category = "main" optional = false python-versions = "*" files = [ @@ -121,6 +129,7 @@ test = ["astroid (<=2.5.3)", "pytest"] name = "async-generator" version = "1.10" description = "Async generators and context managers for Python 3.5+" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -132,6 +141,7 @@ files = [ name = "attrs" version = "20.3.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -149,6 +159,7 @@ tests-no-zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (> name = "autobahn" version = "23.1.2" description = "WebSocket client & server library, WAMP real-time framework" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -177,6 +188,7 @@ xbr = ["base58 (>=2.1.0)", "cbor2 (>=5.2.0)", "click (>=8.1.2)", "ecdsa (>=0.16. name = "automat" version = "22.10.0" description = "Self-service finite-state machines for the programmer on the go." +category = "main" optional = false python-versions = "*" files = [ @@ -195,6 +207,7 @@ visualize = ["Twisted (>=16.1.1)", "graphviz (>0.5.1)"] name = "backcall" version = "0.2.0" description = "Specifications for callback functions passed in to an API" +category = "main" optional = false python-versions = "*" files = [ @@ -206,6 +219,7 @@ files = [ name = "beautifulsoup4" version = "4.11.2" description = "Screen-scraping library" +category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -224,6 +238,7 @@ lxml = ["lxml"] name = "billiard" version = "4.1.0" description = "Python multiprocessing fork with improvements and bugfixes" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -235,6 +250,7 @@ files = [ name = "black" version = "23.3.0" description = "The uncompromising code formatter." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -283,6 +299,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "boto3" version = "1.17.43" description = "The AWS SDK for Python" +category = "main" optional = false python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -299,6 +316,7 @@ s3transfer = ">=0.3.0,<0.4.0" name = "botocore" version = "1.20.43" description = "Low-level, data-driven core of boto 3." +category = "main" optional = false python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -318,6 +336,7 @@ crt = ["awscrt (==0.10.8)"] name = "celery" version = "5.3.0" description = "Distributed Task Queue." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -373,6 +392,7 @@ zstd = ["zstandard (==0.21.0)"] name = "certifi" version = "2022.12.7" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -384,6 +404,7 @@ files = [ name = "cffi" version = "1.14.5" description = "Foreign Function Interface for Python calling C code." +category = "main" optional = false python-versions = "*" files = [ @@ -445,6 +466,7 @@ pycparser = "*" name = "cfgv" version = "3.3.1" description = "Validate configuration and produce human readable error messages." +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -456,6 +478,7 @@ files = [ name = "chardet" version = "5.1.0" description = "Universal encoding detector for Python 3" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -467,6 +490,7 @@ files = [ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -551,6 +575,7 @@ files = [ name = "click" version = "8.1.2" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -565,6 +590,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "click-didyoumean" version = "0.3.0" description = "Enables git-like *did-you-mean* feature in click" +category = "main" optional = false python-versions = ">=3.6.2,<4.0.0" files = [ @@ -579,6 +605,7 @@ click = ">=7" name = "click-plugins" version = "1.1.1" description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +category = "main" optional = false python-versions = "*" files = [ @@ -596,6 +623,7 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] name = "click-repl" version = "0.2.0" description = "REPL plugin for Click" +category = "main" optional = false python-versions = "*" files = [ @@ -612,6 +640,7 @@ six = "*" name = "climage" version = "0.1.3" description = "Convert images to beautiful ANSI escape codes" +category = "main" optional = false python-versions = ">=3.2" files = [ @@ -627,6 +656,7 @@ Pillow = "*" name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -638,6 +668,7 @@ files = [ name = "constantly" version = "15.1.0" description = "Symbolic constants in Python" +category = "main" optional = false python-versions = "*" files = [ @@ -649,6 +680,7 @@ files = [ name = "contextlib2" version = "0.6.0.post1" description = "Backports and enhancements for the contextlib module" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -660,6 +692,7 @@ files = [ name = "coreapi" version = "2.3.3" description = "Python client library for Core API." +category = "main" optional = false python-versions = "*" files = [ @@ -677,6 +710,7 @@ uritemplate = "*" name = "coreschema" version = "0.0.4" description = "Core Schema." +category = "main" optional = false python-versions = "*" files = [ @@ -691,6 +725,7 @@ jinja2 = "*" name = "courts-db" version = "0.10.9" description = "Database of Courts" +category = "main" optional = false python-versions = "*" files = [ @@ -702,6 +737,7 @@ files = [ name = "cryptography" version = "36.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -742,6 +778,7 @@ test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0 name = "cssselect" version = "1.2.0" description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -753,6 +790,7 @@ files = [ name = "daphne" version = "4.0.0" description = "Django ASGI (HTTP/WebSocket) server" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -772,6 +810,7 @@ tests = ["django", "hypothesis", "pytest", "pytest-asyncio"] name = "datasketch" version = "1.5.7" description = "Probabilistic data structures for processing and searching very large datasets" +category = "main" optional = false python-versions = "*" files = [ @@ -794,6 +833,7 @@ test = ["cassandra-driver (>=3.20)", "coverage", "mock (>=2.0.0)", "mockredispy" name = "dateparser" version = "1.1.8" description = "Date parsing library designed to parse dates from HTML pages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -816,6 +856,7 @@ langdetect = ["langdetect"] name = "decorator" version = "5.1.1" description = "Decorators for Humans" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -827,6 +868,7 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -838,6 +880,7 @@ files = [ name = "dill" version = "0.3.6" description = "serialize all of python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -852,6 +895,7 @@ graph = ["objgraph (>=1.7.2)"] name = "disposable-email-domains" version = "0.0.64" description = "A set of disposable email domains" +category = "main" optional = false python-versions = "*" files = [ @@ -866,6 +910,7 @@ dev = ["check-manifest"] name = "distlib" version = "0.3.6" description = "Distribution utilities" +category = "dev" optional = false python-versions = "*" files = [ @@ -877,6 +922,7 @@ files = [ name = "django" version = "4.2.1" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -897,6 +943,7 @@ bcrypt = ["bcrypt"] name = "django-admin-cursor-paginator" version = "0.1.2" description = "Drop-in replacement for django admin default pagination that works fast with huge tables." +category = "main" optional = false python-versions = ">=3.4" files = [ @@ -911,6 +958,7 @@ Django = ">=2.0" name = "django-cache-memoize" version = "0.1.8" description = "Django utility for a memoization decorator that uses the Django cache framework." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -925,6 +973,7 @@ dev = ["black", "flake8", "therapist", "tox", "twine"] name = "django-cors-headers" version = "3.14.0" description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -939,6 +988,7 @@ Django = ">=3.2" name = "django-debug-toolbar" version = "4.0.0" description = "A configurable set of panels that display various debug information about the current request/response." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -954,6 +1004,7 @@ sqlparse = ">=0.2" name = "django-elasticsearch-dsl" version = "7.3" description = "Wrapper around elasticsearch-dsl-py for django models" +category = "main" optional = false python-versions = "*" files = [ @@ -969,6 +1020,7 @@ six = "*" name = "django-environ" version = "0.8.1" description = "A package that allows you to utilize 12factor inspired environment variables to configure your Django application." +category = "main" optional = false python-versions = ">=3.4,<4" files = [ @@ -977,14 +1029,15 @@ files = [ ] [package.extras] -develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.dev0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] -docs = ["furo (>=2021.8.17b43,<2021.9.dev0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] +develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] +docs = ["furo (>=2021.8.17b43,<2021.9.0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] testing = ["coverage[toml] (>=5.0a4)", "pytest (>=4.6.11)"] [[package]] name = "django-extensions" version = "3.2.1" description = "Extensions for Django" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -999,6 +1052,7 @@ Django = ">=3.2" name = "django-filter" version = "2.4.0" description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1013,6 +1067,7 @@ Django = ">=2.2" name = "django-hcaptcha" version = "0.2.0" description = "Django hCaptcha provides a simple way to protect your django forms using hCaptcha" +category = "main" optional = false python-versions = "*" files = [ @@ -1024,6 +1079,7 @@ files = [ name = "django-localflavor" version = "3.1" description = "Country-specific Django helpers" +category = "main" optional = false python-versions = "*" files = [ @@ -1039,6 +1095,7 @@ python-stdnum = ">=1.6" name = "django-markdown-deux" version = "1.0.6" description = "a Django app that provides template tags for using Markdown (using the python-markdown2 processor)" +category = "main" optional = false python-versions = "*" files = [ @@ -1052,6 +1109,7 @@ markdown2 = "*" name = "django-mathfilters" version = "1.0.0" description = "A set of simple math filters for Django" +category = "main" optional = false python-versions = "*" files = [ @@ -1063,6 +1121,7 @@ files = [ name = "django-override-storage" version = "0.3.2" description = "Django test helpers to manage file storage side effects." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1074,6 +1133,7 @@ files = [ name = "django-pghistory" version = "2.7.0" description = "History tracking for Django and Postgres" +category = "main" optional = false python-versions = ">=3.7.0,<4" files = [ @@ -1089,6 +1149,7 @@ django-pgtrigger = ">=4.5.0" name = "django-pgtrigger" version = "4.6.0" description = "Postgres trigger support integrated with Django models." +category = "main" optional = false python-versions = ">=3.7.0,<4" files = [ @@ -1103,6 +1164,7 @@ django = ">=2" name = "django-ratelimit" version = "4.0.0" description = "Cache-based rate-limiting for Django." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1114,6 +1176,7 @@ files = [ name = "django-ses" version = "3.3.0" description = "A Django email backend for Amazon's Simple Email Service" +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1136,6 +1199,7 @@ events = ["cryptography (>=36.0.2)", "requests (>=2.27.1)"] name = "django-storages" version = "1.13.2" description = "Support for many storage backends in Django" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1158,6 +1222,7 @@ sftp = ["paramiko (>=1.10.0)"] name = "django-stubs" version = "4.2.0" description = "Mypy stubs for Django" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1181,6 +1246,7 @@ compatible-mypy = ["mypy (>=1.2.0,<1.3)"] name = "django-stubs-ext" version = "4.2.0" description = "Monkey-patching and extensions for django-stubs" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1196,6 +1262,7 @@ typing-extensions = "*" name = "django-waffle" version = "3.0.0" description = "A feature flipper for Django." +category = "main" optional = false python-versions = "*" files = [ @@ -1207,6 +1274,7 @@ files = [ name = "djangorestframework" version = "3.14.0" description = "Web APIs for Django, made easy." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1222,6 +1290,7 @@ pytz = "*" name = "djangorestframework-filters" version = "1.0.0.dev2" description = "Better filtering for Django REST Framework" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1237,6 +1306,7 @@ djangorestframework = "*" name = "djangorestframework-stubs" version = "3.14.0" description = "PEP-484 stubs for django-rest-framework" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1261,6 +1331,7 @@ markdown = ["types-Markdown (>=0.1.5)"] name = "djangorestframework-xml" version = "2.0.0" description = "XML support for Django REST Framework" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1280,6 +1351,7 @@ tests = ["Django (>=1.6)", "djangorestframework (>=2.4.3)", "flake8", "pytest", name = "docopt" version = "0.6.2" description = "Pythonic argument parser, that will make you smile" +category = "main" optional = false python-versions = "*" files = [ @@ -1288,19 +1360,21 @@ files = [ [[package]] name = "drf-dynamic-fields" -version = "0.3.1" +version = "0.4.0" description = "Dynamically return subset of Django REST Framework serializer fields" +category = "main" optional = false python-versions = "*" files = [ - {file = "drf_dynamic_fields-0.3.1-py2.py3-none-any.whl", hash = "sha256:fa5a7ea010476184d776b4b977d57d0090e651e8f897d83ed0c2f2bca9cbf704"}, - {file = "drf_dynamic_fields-0.3.1.tar.gz", hash = "sha256:de75969abff74332f339d082931f1815dc91c2ff1ed6e741bd33d1d5057dceb1"}, + {file = "drf_dynamic_fields-0.4.0-py2.py3-none-any.whl", hash = "sha256:48b879fe899905bc18593a61bca43e3b595dc3431b3b4ee499a9fd6c9a53f98c"}, + {file = "drf_dynamic_fields-0.4.0.tar.gz", hash = "sha256:f20a5ec27d003db7595c9315db22217493dcaed575f3811d3e12f264c791c20c"}, ] [[package]] name = "elasticsearch" version = "7.17.9" description = "Python client for Elasticsearch" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" files = [ @@ -1322,6 +1396,7 @@ requests = ["requests (>=2.4.0,<3.0.0)"] name = "elasticsearch-dsl" version = "7.4.0" description = "Python client for Elasticsearch" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1341,6 +1416,7 @@ develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytes name = "exceptiongroup" version = "1.1.1" description = "Backport of PEP 654 (exception groups)" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1355,6 +1431,7 @@ test = ["pytest (>=6)"] name = "executing" version = "1.1.0" description = "Get the currently executing AST node of a frame, and other information" +category = "main" optional = false python-versions = "*" files = [ @@ -1369,6 +1446,7 @@ tests = ["asttokens", "littleutils", "pytest", "rich"] name = "exrex" version = "0.11.0" description = "Irregular methods for regular expressions" +category = "dev" optional = false python-versions = "*" files = [ @@ -1380,6 +1458,7 @@ files = [ name = "eyecite" version = "2.4.0" description = "Tool for extracting legal citations from text strings." +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1399,6 +1478,7 @@ reporters-db = ">=3.2.2,<4.0.0" name = "factory-boy" version = "3.2.1" description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1417,6 +1497,7 @@ doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"] name = "faker" version = "13.3.1" description = "Faker is a Python package that generates fake data for you." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1431,6 +1512,7 @@ python-dateutil = ">=2.4" name = "fast-diff-match-patch" version = "2.0.1" description = "fast_diff_match_patch: Python package wrapping the C++ implementation of google-diff-match-patch" +category = "main" optional = false python-versions = "*" files = [ @@ -1506,6 +1588,7 @@ files = [ name = "feedparser" version = "6.0.10" description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1520,6 +1603,7 @@ sgmllib3k = "*" name = "filelock" version = "3.9.0" description = "A platform independent file lock." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1535,6 +1619,7 @@ testing = ["covdefaults (>=2.2.2)", "coverage (>=7.0.1)", "pytest (>=7.2)", "pyt name = "flake8" version = "6.0.0" description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" optional = false python-versions = ">=3.8.1" files = [ @@ -1551,6 +1636,7 @@ pyflakes = ">=3.0.0,<3.1.0" name = "flynt" version = "0.78" description = "CLI tool to convert a python project's %-formatted strings to f-strings." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1569,6 +1655,7 @@ dev = ["build", "pre-commit", "pytest", "pytest-cov", "twine"] name = "future" version = "0.18.3" description = "Clean single-source support for Python 3 and 2" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1579,6 +1666,7 @@ files = [ name = "fuzzywuzzy" version = "0.18.0" description = "Fuzzy string matching in python" +category = "main" optional = false python-versions = "*" files = [ @@ -1593,6 +1681,7 @@ speedup = ["python-levenshtein (>=0.12)"] name = "geonamescache" version = "1.6.0" description = "Geonames data for continents, cities and US states." +category = "main" optional = false python-versions = "*" files = [ @@ -1604,6 +1693,7 @@ files = [ name = "gunicorn" version = "20.1.0" description = "WSGI HTTP Server for UNIX" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1624,6 +1714,7 @@ tornado = ["tornado (>=0.2)"] name = "h11" version = "0.13.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1635,6 +1726,7 @@ files = [ name = "html5lib" version = "1.1" description = "HTML parser based on the WHATWG HTML specification" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1656,6 +1748,7 @@ lxml = ["lxml"] name = "httplib2" version = "0.22.0" description = "A comprehensive HTTP client library." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1670,6 +1763,7 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0 name = "httptools" version = "0.5.0" description = "A collection of framework independent HTTP protocol utils." +category = "main" optional = false python-versions = ">=3.5.0" files = [ @@ -1723,6 +1817,7 @@ test = ["Cython (>=0.29.24,<0.30.0)"] name = "hyperlink" version = "21.0.0" description = "A featureful, immutable, and correct URL for Python." +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1737,6 +1832,7 @@ idna = ">=2.5" name = "identify" version = "2.5.17" description = "File identification library for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1751,6 +1847,7 @@ license = ["ukkonen"] name = "idna" version = "2.10" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1762,6 +1859,7 @@ files = [ name = "igraph" version = "0.10.4" description = "High performance graph data structures and algorithms" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1828,6 +1926,7 @@ test-musl = ["networkx (>=2.5)", "pytest (>=7.0.1)", "pytest-timeout (>=2.1.0)"] name = "incremental" version = "22.10.0" description = "\"A small library that versions your Python projects.\"" +category = "main" optional = false python-versions = "*" files = [ @@ -1843,6 +1942,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"] name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1854,6 +1954,7 @@ files = [ name = "internetarchive" version = "3.3.0" description = "A Python interface to archive.org." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1879,6 +1980,7 @@ types = ["tqdm-stubs (>=0.2.0)", "types-colorama", "types-docopt (>=0.6.10,<0.7. name = "ipaddress" version = "1.0.23" description = "IPv4/IPv6 manipulation library" +category = "main" optional = false python-versions = "*" files = [ @@ -1890,6 +1992,7 @@ files = [ name = "ipython" version = "8.10.0" description = "IPython: Productive Interactive Computing" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1928,6 +2031,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa name = "isort" version = "5.8.0" description = "A Python utility / library to sort Python imports." +category = "dev" optional = false python-versions = ">=3.6,<4.0" files = [ @@ -1944,6 +2048,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "itypes" version = "1.2.0" description = "Simple immutable types for python." +category = "main" optional = false python-versions = "*" files = [ @@ -1955,6 +2060,7 @@ files = [ name = "jedi" version = "0.18.1" description = "An autocompletion tool for Python that can be used for text editors." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1973,6 +2079,7 @@ testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"] name = "jinja2" version = "2.11.3" description = "A very fast and expressive template engine." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1990,6 +2097,7 @@ i18n = ["Babel (>=0.8)"] name = "jmespath" version = "0.10.0" description = "JSON Matching Expressions" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2001,6 +2109,7 @@ files = [ name = "jsonpatch" version = "1.32" description = "Apply JSON-Patches (RFC 6902)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -2015,6 +2124,7 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "2.1" description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2026,6 +2136,7 @@ files = [ name = "judge-pics" version = "2.0.2" description = "Database of Judge Pictures" +category = "main" optional = false python-versions = "*" files = [ @@ -2045,6 +2156,7 @@ requests = ">=2.0,<3.0" name = "juriscraper" version = "2.5.49" description = "An API to scrape American court websites for metadata." +category = "main" optional = false python-versions = "*" files = [ @@ -2072,6 +2184,7 @@ tldextract = "*" name = "kdtree" version = "0.16" description = "A Python implemntation of a kd-tree" +category = "main" optional = false python-versions = "*" files = [ @@ -2083,6 +2196,7 @@ files = [ name = "kombu" version = "5.3.0" description = "Messaging library for Python." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2115,6 +2229,7 @@ zookeeper = ["kazoo (>=2.8.0)"] name = "lazy-object-proxy" version = "1.6.0" description = "A fast and thorough lazy object proxy." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -2146,6 +2261,7 @@ files = [ name = "lxml" version = "4.9.1" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" files = [ @@ -2231,6 +2347,7 @@ source = ["Cython (>=0.29.7)"] name = "lxml-stubs" version = "0.4.0" description = "Type annotations for the lxml package" +category = "dev" optional = false python-versions = "*" files = [ @@ -2245,6 +2362,7 @@ test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1. name = "markdown2" version = "2.4.0" description = "A fast and complete Python implementation of Markdown" +category = "main" optional = false python-versions = ">=3.5, <4" files = [ @@ -2256,6 +2374,7 @@ files = [ name = "markupsafe" version = "1.1.1" description = "Safely add untrusted strings to HTML/XML markup." +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" files = [ @@ -2317,6 +2436,7 @@ files = [ name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -2331,6 +2451,7 @@ traitlets = "*" name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2342,6 +2463,7 @@ files = [ name = "mypy" version = "1.2.0" description = "Optional static typing for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2388,6 +2510,7 @@ reports = ["lxml"] name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2399,6 +2522,7 @@ files = [ name = "nameparser" version = "1.1.1" description = "A simple Python module for parsing human names into their individual components." +category = "main" optional = false python-versions = "*" files = [ @@ -2410,6 +2534,7 @@ files = [ name = "natsort" version = "8.3.1" description = "Simple yet flexible natural sorting in Python." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2425,8 +2550,9 @@ icu = ["PyICU (>=1.0.0)"] name = "ndg-httpsclient" version = "0.5.1" description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL" +category = "main" optional = false -python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0" +python-versions = ">=2.7,<3.0.0 || >=3.4.0" files = [ {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"}, {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"}, @@ -2441,6 +2567,7 @@ PyOpenSSL = "*" name = "networkx" version = "3.1" description = "Python package for creating and manipulating graphs and networks" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2459,6 +2586,7 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] name = "nodeenv" version = "1.7.0" description = "Node.js virtual environment builder" +category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -2473,6 +2601,7 @@ setuptools = "*" name = "nose" version = "1.3.7" description = "nose extends unittest to make testing easier" +category = "main" optional = false python-versions = "*" files = [ @@ -2485,6 +2614,7 @@ files = [ name = "numpy" version = "1.24.2" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2522,6 +2652,7 @@ files = [ name = "openapi-codec" version = "1.3.2" description = "An OpenAPI codec for Core API." +category = "main" optional = false python-versions = "*" files = [ @@ -2535,6 +2666,7 @@ coreapi = ">=2.2.0" name = "outcome" version = "1.2.0" description = "Capture the outcome of Python function calls." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2549,6 +2681,7 @@ attrs = ">=19.2.0" name = "packaging" version = "23.1" description = "Core utilities for Python packages" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2560,6 +2693,7 @@ files = [ name = "pandas" version = "1.5.0" description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2604,6 +2738,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] name = "parso" version = "0.8.3" description = "A Python Parser" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2619,6 +2754,7 @@ testing = ["docopt", "pytest (<6.0.0)"] name = "pathspec" version = "0.9.0" description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -2630,6 +2766,7 @@ files = [ name = "pexpect" version = "4.8.0" description = "Pexpect allows easy control of interactive console applications." +category = "main" optional = false python-versions = "*" files = [ @@ -2644,6 +2781,7 @@ ptyprocess = ">=0.5" name = "pickleshare" version = "0.7.5" description = "Tiny 'shelve'-like database with concurrency support" +category = "main" optional = false python-versions = "*" files = [ @@ -2655,6 +2793,7 @@ files = [ name = "pillow" version = "9.3.0" description = "Python Imaging Library (Fork)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2729,6 +2868,7 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa name = "platformdirs" version = "2.5.1" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2744,6 +2884,7 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock name = "pluggy" version = "0.13.1" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2758,6 +2899,7 @@ dev = ["pre-commit", "tox"] name = "pre-commit" version = "3.3.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2776,6 +2918,7 @@ virtualenv = ">=20.10.0" name = "probableparsing" version = "0.0.1" description = "Common methods for propbable parsers" +category = "main" optional = false python-versions = "*" files = [ @@ -2787,6 +2930,7 @@ files = [ name = "prompt-toolkit" version = "3.0.31" description = "Library for building powerful interactive command lines in Python" +category = "main" optional = false python-versions = ">=3.6.2" files = [ @@ -2801,6 +2945,7 @@ wcwidth = "*" name = "psycopg2" version = "2.9.5" description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2823,6 +2968,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" +category = "main" optional = false python-versions = "*" files = [ @@ -2834,6 +2980,7 @@ files = [ name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" +category = "main" optional = false python-versions = "*" files = [ @@ -2848,6 +2995,7 @@ tests = ["pytest"] name = "pyahocorasick" version = "1.4.2" description = "pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search. With the ahocorasick.Automaton class, you can find multiple key strings occurrences at once in some input text. You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search. Implemented in C and tested on Python 2.7 and 3.4+. Works on Linux, Mac and Windows. BSD-3-clause license." +category = "main" optional = false python-versions = "*" files = [ @@ -2858,6 +3006,7 @@ files = [ name = "pyasn1" version = "0.4.8" description = "ASN.1 types and codecs" +category = "main" optional = false python-versions = "*" files = [ @@ -2869,6 +3018,7 @@ files = [ name = "pyasn1-modules" version = "0.3.0" description = "A collection of ASN.1-based protocols modules" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -2883,6 +3033,7 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pycodestyle" version = "2.10.0" description = "Python style guide checker" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2894,6 +3045,7 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2905,6 +3057,7 @@ files = [ name = "pyflakes" version = "3.0.1" description = "passive checker of Python programs" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2916,6 +3069,7 @@ files = [ name = "pygments" version = "2.13.0" description = "Pygments is a syntax highlighting package written in Python." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2930,6 +3084,7 @@ plugins = ["importlib-metadata"] name = "pylint" version = "2.17.3" description = "python code static checker" +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -2958,6 +3113,7 @@ testutils = ["gitpython (>3)"] name = "pyopenssl" version = "20.0.1" description = "Python wrapper module around the OpenSSL library" +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -2977,6 +3133,7 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"] name = "pyparsing" version = "2.4.7" description = "Python parsing module" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2988,6 +3145,7 @@ files = [ name = "pysocks" version = "1.7.1" description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3000,6 +3158,7 @@ files = [ name = "pystemmer" version = "2.0.1" description = "Snowball stemming algorithms, for information retrieval" +category = "main" optional = false python-versions = "*" files = [ @@ -3010,6 +3169,7 @@ files = [ name = "pytest" version = "7.3.1" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3032,6 +3192,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-django" version = "4.5.2" description = "A Django plugin for pytest." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3050,6 +3211,7 @@ testing = ["Django", "django-configurations (>=2.0)"] name = "python-crfsuite" version = "0.9.9" description = "Python binding for CRFsuite" +category = "main" optional = false python-versions = "*" files = [ @@ -3100,6 +3262,7 @@ files = [ name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -3114,6 +3277,7 @@ six = ">=1.5" name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3128,6 +3292,7 @@ cli = ["click (>=5.0)"] name = "python-levenshtein" version = "0.12.2" description = "Python extension for computing string edit distances and similarities." +category = "main" optional = false python-versions = "*" files = [ @@ -3141,6 +3306,7 @@ setuptools = "*" name = "python-magic" version = "0.4.22" description = "File type identification using libmagic" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3152,6 +3318,7 @@ files = [ name = "python-stdnum" version = "1.16" description = "Python module to handle standardized numbers and codes" +category = "main" optional = false python-versions = "*" files = [ @@ -3168,6 +3335,7 @@ soap-fallback = ["PySimpleSOAP"] name = "pytz" version = "2021.1" description = "World timezone definitions, modern and historical" +category = "main" optional = false python-versions = "*" files = [ @@ -3179,6 +3347,7 @@ files = [ name = "pytz-deprecation-shim" version = "0.1.0.post0" description = "Shims to make deprecation of pytz easier" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -3193,6 +3362,7 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""} name = "pyyaml" version = "5.4.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3231,6 +3401,7 @@ files = [ name = "redis" version = "3.5.3" description = "Python client for Redis key-value store" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3245,6 +3416,7 @@ hiredis = ["hiredis (>=0.1.3)"] name = "regex" version = "2022.1.18" description = "Alternative regular expression module, to replace re." +category = "main" optional = false python-versions = "*" files = [ @@ -3328,6 +3500,7 @@ files = [ name = "reporters-db" version = "3.2.36" description = "Database of Court Reporters" +category = "main" optional = false python-versions = "*" files = [ @@ -3342,6 +3515,7 @@ six = ">=1.0.0" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3363,6 +3537,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-file" version = "1.5.1" description = "File transport adapter for Requests" +category = "main" optional = false python-versions = "*" files = [ @@ -3378,6 +3553,7 @@ six = "*" name = "s3transfer" version = "0.3.6" description = "An Amazon S3 Transfer Manager" +category = "main" optional = false python-versions = "*" files = [ @@ -3392,6 +3568,7 @@ botocore = ">=1.12.36,<2.0a.0" name = "schema" version = "0.7.4" description = "Simple data validation library" +category = "main" optional = false python-versions = "*" files = [ @@ -3406,6 +3583,7 @@ contextlib2 = ">=0.5.5" name = "scipy" version = "1.10.1" description = "Fundamental algorithms for scientific computing in Python" +category = "main" optional = false python-versions = "<3.12,>=3.8" files = [ @@ -3444,6 +3622,7 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo name = "scorched" version = "0.13.1.dev0" description = "" +category = "main" optional = false python-versions = "*" files = [] @@ -3466,6 +3645,7 @@ resolved_reference = "0632024e72e22a71e17cdb778805561f7cdd33d8" name = "selenium" version = "4.9.1" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3483,6 +3663,7 @@ urllib3 = {version = ">=1.26,<3", extras = ["socks"]} name = "sentry-sdk" version = "1.14.0" description = "Python client for Sentry (https://sentry.io)" +category = "main" optional = false python-versions = "*" files = [ @@ -3521,6 +3702,7 @@ tornado = ["tornado (>=5)"] name = "service-identity" version = "21.1.0" description = "Service identity verification for pyOpenSSL & cryptography." +category = "main" optional = false python-versions = "*" files = [ @@ -3545,6 +3727,7 @@ tests = ["coverage[toml] (>=5.0.2)", "pytest"] name = "setuptools" version = "65.5.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3561,6 +3744,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "sgmllib3k" version = "1.0.0" description = "Py3k port of sgmllib." +category = "main" optional = false python-versions = "*" files = [ @@ -3571,6 +3755,7 @@ files = [ name = "simplejson" version = "3.18.3" description = "Simple, fast, extensible JSON encoder/decoder for Python" +category = "main" optional = false python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3653,6 +3838,7 @@ files = [ name = "six" version = "1.15.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3664,6 +3850,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3675,6 +3862,7 @@ files = [ name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +category = "main" optional = false python-versions = "*" files = [ @@ -3686,6 +3874,7 @@ files = [ name = "soupsieve" version = "2.2.1" description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -3697,6 +3886,7 @@ files = [ name = "sqlparse" version = "0.4.4" description = "A non-validating SQL parser." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3713,6 +3903,7 @@ test = ["pytest", "pytest-cov"] name = "stack-data" version = "0.5.1" description = "Extract data from python stack frames and tracebacks for informative displays" +category = "main" optional = false python-versions = "*" files = [ @@ -3732,6 +3923,7 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] name = "stripe" version = "5.2.0" description = "Python bindings for the Stripe API" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3746,6 +3938,7 @@ requests = {version = ">=2.20", markers = "python_version >= \"3.0\""} name = "tblib" version = "1.7.0" description = "Traceback serialization library." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3757,6 +3950,7 @@ files = [ name = "texttable" version = "1.6.4" description = "module for creating simple ASCII tables" +category = "main" optional = false python-versions = "*" files = [ @@ -3768,6 +3962,7 @@ files = [ name = "time-machine" version = "2.9.0" description = "Travel through time in your tests." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3833,6 +4028,7 @@ python-dateutil = "*" name = "timeout-decorator" version = "0.5.0" description = "Timeout decorator" +category = "main" optional = false python-versions = "*" files = [ @@ -3843,6 +4039,7 @@ files = [ name = "tldextract" version = "3.4.0" description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3860,6 +4057,7 @@ requests-file = ">=1.4" name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3871,6 +4069,7 @@ files = [ name = "tomlkit" version = "0.11.8" description = "Style preserving TOML library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3882,6 +4081,7 @@ files = [ name = "tqdm" version = "4.59.0" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ @@ -3898,6 +4098,7 @@ telegram = ["requests"] name = "traitlets" version = "5.4.0" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3912,6 +4113,7 @@ test = ["pre-commit", "pytest"] name = "trio" version = "0.21.0" description = "A friendly Python library for async concurrency and I/O" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3932,6 +4134,7 @@ sortedcontainers = "*" name = "trio-websocket" version = "0.9.2" description = "WebSocket library for Trio" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3948,6 +4151,7 @@ wsproto = ">=0.14" name = "twisted" version = "22.4.0" description = "An asynchronous networking framework written in Python" +category = "main" optional = false python-versions = ">=3.6.7" files = [ @@ -3988,6 +4192,7 @@ windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0. name = "twisted-iocpsupport" version = "1.0.3" description = "An extension for use in the twisted I/O Completion Ports reactor." +category = "main" optional = false python-versions = "*" files = [ @@ -4013,6 +4218,7 @@ files = [ name = "txaio" version = "23.1.1" description = "Compatibility API between asyncio/Twisted/Trollius" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4029,6 +4235,7 @@ twisted = ["twisted (>=20.3.0)", "zope.interface (>=5.2.0)"] name = "types-dateparser" version = "1.1.4.6" description = "Typing stubs for dateparser" +category = "main" optional = false python-versions = "*" files = [ @@ -4040,6 +4247,7 @@ files = [ name = "types-pyopenssl" version = "23.0.0.4" description = "Typing stubs for pyOpenSSL" +category = "dev" optional = false python-versions = "*" files = [ @@ -4054,6 +4262,7 @@ cryptography = ">=35.0.0" name = "types-python-dateutil" version = "2.8.19.12" description = "Typing stubs for python-dateutil" +category = "dev" optional = false python-versions = "*" files = [ @@ -4065,6 +4274,7 @@ files = [ name = "types-pytz" version = "2021.3.5" description = "Typing stubs for pytz" +category = "dev" optional = false python-versions = "*" files = [ @@ -4076,6 +4286,7 @@ files = [ name = "types-pyyaml" version = "6.0.4" description = "Typing stubs for PyYAML" +category = "dev" optional = false python-versions = "*" files = [ @@ -4087,6 +4298,7 @@ files = [ name = "types-redis" version = "4.5.4.1" description = "Typing stubs for redis" +category = "dev" optional = false python-versions = "*" files = [ @@ -4102,6 +4314,7 @@ types-pyOpenSSL = "*" name = "types-requests" version = "2.29.0.0" description = "Typing stubs for requests" +category = "dev" optional = false python-versions = "*" files = [ @@ -4116,6 +4329,7 @@ types-urllib3 = "<1.27" name = "types-simplejson" version = "3.19.0.0" description = "Typing stubs for simplejson" +category = "dev" optional = false python-versions = "*" files = [ @@ -4127,6 +4341,7 @@ files = [ name = "types-urllib3" version = "1.26.11" description = "Typing stubs for urllib3" +category = "dev" optional = false python-versions = "*" files = [ @@ -4138,6 +4353,7 @@ files = [ name = "typing-extensions" version = "4.1.1" description = "Backported and Experimental Type Hints for Python 3.6+" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4149,6 +4365,7 @@ files = [ name = "tzdata" version = "2022.7" description = "Provider of IANA time zone data" +category = "main" optional = false python-versions = ">=2" files = [ @@ -4160,6 +4377,7 @@ files = [ name = "tzlocal" version = "4.2" description = "tzinfo object for the local timezone" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4179,6 +4397,7 @@ test = ["pytest (>=4.3)", "pytest-mock (>=3.3)"] name = "unidecode" version = "1.2.0" description = "ASCII transliterations of Unicode text" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -4190,6 +4409,7 @@ files = [ name = "uritemplate" version = "3.0.1" description = "URI templates" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -4201,6 +4421,7 @@ files = [ name = "urllib3" version = "1.26.15" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -4220,6 +4441,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "usaddress" version = "0.5.10" description = "Parse US addresses using conditional random fields" +category = "main" optional = false python-versions = "*" files = [ @@ -4236,6 +4458,7 @@ python-crfsuite = ">=0.7" name = "uvicorn" version = "0.22.0" description = "The lightning-fast ASGI server." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4250,7 +4473,7 @@ h11 = ">=0.8" httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} @@ -4261,6 +4484,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "uvloop" version = "0.17.0" description = "Fast implementation of asyncio event loop on top of libuv" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4305,6 +4529,7 @@ test = ["Cython (>=0.29.32,<0.30.0)", "aiohttp", "flake8 (>=3.9.2,<3.10.0)", "my name = "vine" version = "5.0.0" description = "Promises, promises, promises." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4316,6 +4541,7 @@ files = [ name = "virtualenv" version = "20.17.1" description = "Virtual Python Environment builder" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -4336,6 +4562,7 @@ testing = ["coverage (>=6.2)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7 name = "watchfiles" version = "0.19.0" description = "Simple, modern and high performance file watching and code reload in python." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4370,6 +4597,7 @@ anyio = ">=3.0.0" name = "wcwidth" version = "0.2.5" description = "Measures the displayed width of unicode strings in a terminal" +category = "main" optional = false python-versions = "*" files = [ @@ -4381,6 +4609,7 @@ files = [ name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" +category = "main" optional = false python-versions = "*" files = [ @@ -4392,6 +4621,7 @@ files = [ name = "websockets" version = "11.0.3" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4471,6 +4701,7 @@ files = [ name = "wrapt" version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -4555,6 +4786,7 @@ files = [ name = "wsproto" version = "1.2.0" description = "WebSockets state-machine based protocol implementation" +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -4569,6 +4801,7 @@ h11 = ">=0.9.0,<1" name = "zope-interface" version = "6.0" description = "Interfaces for Python" +category = "main" optional = false python-versions = ">=3.7" files = [ From e067b3bcd89b42a0409239de3966e667acda6b33 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Jun 2023 23:08:40 +0000 Subject: [PATCH 004/372] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e531956054..e9052f9eaf 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ This repository is organized in the following way: - scripts: logrotate, systemd, etc, and init scripts for our various configurations and daemons. -## Getting Involved +## Getting Involved If you want to get involved send us an email with your contact info or take a look through the [issues list][issues]. There are innumerable things we need help with, but we especially are looking for help with: From 661a8786def6bb2ce751dfc236906e6954b7bba9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Jun 2023 23:09:25 +0000 Subject: [PATCH 005/372] build(deps): bump django-ses from 3.3.0 to 3.5.0 Bumps [django-ses](https://github.com/django-ses/django-ses) from 3.3.0 to 3.5.0. - [Release notes](https://github.com/django-ses/django-ses/releases) - [Changelog](https://github.com/django-ses/django-ses/blob/master/CHANGES.md) - [Commits](https://github.com/django-ses/django-ses/compare/v3.3.0...v3.5.0) --- updated-dependencies: - dependency-name: django-ses dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- poetry.lock | 251 +++++++++++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 2 +- 2 files changed, 243 insertions(+), 10 deletions(-) diff --git a/poetry.lock b/poetry.lock index d923189ec5..c316342e1d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "amqp" version = "5.1.1" description = "Low-level AMQP client for Python (fork of amqplib)." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -18,6 +19,7 @@ vine = ">=5.0.0" name = "anyio" version = "3.6.2" description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" optional = false python-versions = ">=3.6.2" files = [ @@ -38,6 +40,7 @@ trio = ["trio (>=0.16,<0.22)"] name = "appnope" version = "0.1.3" description = "Disable App Nap on macOS >= 10.9" +category = "main" optional = false python-versions = "*" files = [ @@ -49,6 +52,7 @@ files = [ name = "argparse" version = "1.4.0" description = "Python command-line parsing library" +category = "main" optional = false python-versions = "*" files = [ @@ -60,6 +64,7 @@ files = [ name = "asgiref" version = "3.6.0" description = "ASGI specs, helper code, and adapters" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -74,6 +79,7 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] name = "astor" version = "0.8.1" description = "Read/rewrite/write Python ASTs" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ @@ -85,6 +91,7 @@ files = [ name = "astroid" version = "2.15.4" description = "An abstract syntax tree for Python with inference support." +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -104,6 +111,7 @@ wrapt = [ name = "asttokens" version = "2.0.8" description = "Annotate AST trees with source code positions" +category = "main" optional = false python-versions = "*" files = [ @@ -121,6 +129,7 @@ test = ["astroid (<=2.5.3)", "pytest"] name = "async-generator" version = "1.10" description = "Async generators and context managers for Python 3.5+" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -132,6 +141,7 @@ files = [ name = "attrs" version = "20.3.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -149,6 +159,7 @@ tests-no-zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (> name = "autobahn" version = "23.1.2" description = "WebSocket client & server library, WAMP real-time framework" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -177,6 +188,7 @@ xbr = ["base58 (>=2.1.0)", "cbor2 (>=5.2.0)", "click (>=8.1.2)", "ecdsa (>=0.16. name = "automat" version = "22.10.0" description = "Self-service finite-state machines for the programmer on the go." +category = "main" optional = false python-versions = "*" files = [ @@ -195,6 +207,7 @@ visualize = ["Twisted (>=16.1.1)", "graphviz (>0.5.1)"] name = "backcall" version = "0.2.0" description = "Specifications for callback functions passed in to an API" +category = "main" optional = false python-versions = "*" files = [ @@ -206,6 +219,7 @@ files = [ name = "beautifulsoup4" version = "4.11.2" description = "Screen-scraping library" +category = "main" optional = false python-versions = ">=3.6.0" files = [ @@ -224,6 +238,7 @@ lxml = ["lxml"] name = "billiard" version = "4.1.0" description = "Python multiprocessing fork with improvements and bugfixes" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -235,6 +250,7 @@ files = [ name = "black" version = "23.3.0" description = "The uncompromising code formatter." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -283,6 +299,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "boto3" version = "1.17.43" description = "The AWS SDK for Python" +category = "main" optional = false python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -299,6 +316,7 @@ s3transfer = ">=0.3.0,<0.4.0" name = "botocore" version = "1.20.43" description = "Low-level, data-driven core of boto 3." +category = "main" optional = false python-versions = ">= 2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -318,6 +336,7 @@ crt = ["awscrt (==0.10.8)"] name = "celery" version = "5.3.0" description = "Distributed Task Queue." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -373,6 +392,7 @@ zstd = ["zstandard (==0.21.0)"] name = "certifi" version = "2022.12.7" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -384,6 +404,7 @@ files = [ name = "cffi" version = "1.14.5" description = "Foreign Function Interface for Python calling C code." +category = "main" optional = false python-versions = "*" files = [ @@ -445,6 +466,7 @@ pycparser = "*" name = "cfgv" version = "3.3.1" description = "Validate configuration and produce human readable error messages." +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -456,6 +478,7 @@ files = [ name = "chardet" version = "5.1.0" description = "Universal encoding detector for Python 3" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -467,6 +490,7 @@ files = [ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -551,6 +575,7 @@ files = [ name = "click" version = "8.1.2" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -565,6 +590,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "click-didyoumean" version = "0.3.0" description = "Enables git-like *did-you-mean* feature in click" +category = "main" optional = false python-versions = ">=3.6.2,<4.0.0" files = [ @@ -579,6 +605,7 @@ click = ">=7" name = "click-plugins" version = "1.1.1" description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +category = "main" optional = false python-versions = "*" files = [ @@ -596,6 +623,7 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] name = "click-repl" version = "0.2.0" description = "REPL plugin for Click" +category = "main" optional = false python-versions = "*" files = [ @@ -612,6 +640,7 @@ six = "*" name = "climage" version = "0.1.3" description = "Convert images to beautiful ANSI escape codes" +category = "main" optional = false python-versions = ">=3.2" files = [ @@ -627,6 +656,7 @@ Pillow = "*" name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -638,6 +668,7 @@ files = [ name = "constantly" version = "15.1.0" description = "Symbolic constants in Python" +category = "main" optional = false python-versions = "*" files = [ @@ -649,6 +680,7 @@ files = [ name = "contextlib2" version = "0.6.0.post1" description = "Backports and enhancements for the contextlib module" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -660,6 +692,7 @@ files = [ name = "coreapi" version = "2.3.3" description = "Python client library for Core API." +category = "main" optional = false python-versions = "*" files = [ @@ -677,6 +710,7 @@ uritemplate = "*" name = "coreschema" version = "0.0.4" description = "Core Schema." +category = "main" optional = false python-versions = "*" files = [ @@ -691,6 +725,7 @@ jinja2 = "*" name = "courts-db" version = "0.10.9" description = "Database of Courts" +category = "main" optional = false python-versions = "*" files = [ @@ -702,6 +737,7 @@ files = [ name = "cryptography" version = "36.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -742,6 +778,7 @@ test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0 name = "cssselect" version = "1.2.0" description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -753,6 +790,7 @@ files = [ name = "daphne" version = "4.0.0" description = "Django ASGI (HTTP/WebSocket) server" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -772,6 +810,7 @@ tests = ["django", "hypothesis", "pytest", "pytest-asyncio"] name = "datasketch" version = "1.5.7" description = "Probabilistic data structures for processing and searching very large datasets" +category = "main" optional = false python-versions = "*" files = [ @@ -794,6 +833,7 @@ test = ["cassandra-driver (>=3.20)", "coverage", "mock (>=2.0.0)", "mockredispy" name = "dateparser" version = "1.1.8" description = "Date parsing library designed to parse dates from HTML pages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -816,6 +856,7 @@ langdetect = ["langdetect"] name = "decorator" version = "5.1.1" description = "Decorators for Humans" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -827,6 +868,7 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -838,6 +880,7 @@ files = [ name = "dill" version = "0.3.6" description = "serialize all of python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -852,6 +895,7 @@ graph = ["objgraph (>=1.7.2)"] name = "disposable-email-domains" version = "0.0.64" description = "A set of disposable email domains" +category = "main" optional = false python-versions = "*" files = [ @@ -866,6 +910,7 @@ dev = ["check-manifest"] name = "distlib" version = "0.3.6" description = "Distribution utilities" +category = "dev" optional = false python-versions = "*" files = [ @@ -877,6 +922,7 @@ files = [ name = "django" version = "4.2.1" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -897,6 +943,7 @@ bcrypt = ["bcrypt"] name = "django-admin-cursor-paginator" version = "0.1.2" description = "Drop-in replacement for django admin default pagination that works fast with huge tables." +category = "main" optional = false python-versions = ">=3.4" files = [ @@ -911,6 +958,7 @@ Django = ">=2.0" name = "django-cache-memoize" version = "0.1.8" description = "Django utility for a memoization decorator that uses the Django cache framework." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -925,6 +973,7 @@ dev = ["black", "flake8", "therapist", "tox", "twine"] name = "django-cors-headers" version = "3.14.0" description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -939,6 +988,7 @@ Django = ">=3.2" name = "django-debug-toolbar" version = "4.0.0" description = "A configurable set of panels that display various debug information about the current request/response." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -954,6 +1004,7 @@ sqlparse = ">=0.2" name = "django-elasticsearch-dsl" version = "7.3" description = "Wrapper around elasticsearch-dsl-py for django models" +category = "main" optional = false python-versions = "*" files = [ @@ -969,6 +1020,7 @@ six = "*" name = "django-environ" version = "0.8.1" description = "A package that allows you to utilize 12factor inspired environment variables to configure your Django application." +category = "main" optional = false python-versions = ">=3.4,<4" files = [ @@ -977,14 +1029,15 @@ files = [ ] [package.extras] -develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.dev0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] -docs = ["furo (>=2021.8.17b43,<2021.9.dev0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] +develop = ["coverage[toml] (>=5.0a4)", "furo (>=2021.8.17b43,<2021.9.0)", "pytest (>=4.6.11)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] +docs = ["furo (>=2021.8.17b43,<2021.9.0)", "sphinx (>=3.5.0)", "sphinx-notfound-page"] testing = ["coverage[toml] (>=5.0a4)", "pytest (>=4.6.11)"] [[package]] name = "django-extensions" version = "3.2.1" description = "Extensions for Django" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -999,6 +1052,7 @@ Django = ">=3.2" name = "django-filter" version = "2.4.0" description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1013,6 +1067,7 @@ Django = ">=2.2" name = "django-hcaptcha" version = "0.2.0" description = "Django hCaptcha provides a simple way to protect your django forms using hCaptcha" +category = "main" optional = false python-versions = "*" files = [ @@ -1024,6 +1079,7 @@ files = [ name = "django-localflavor" version = "3.1" description = "Country-specific Django helpers" +category = "main" optional = false python-versions = "*" files = [ @@ -1039,6 +1095,7 @@ python-stdnum = ">=1.6" name = "django-markdown-deux" version = "1.0.6" description = "a Django app that provides template tags for using Markdown (using the python-markdown2 processor)" +category = "main" optional = false python-versions = "*" files = [ @@ -1052,6 +1109,7 @@ markdown2 = "*" name = "django-mathfilters" version = "1.0.0" description = "A set of simple math filters for Django" +category = "main" optional = false python-versions = "*" files = [ @@ -1063,6 +1121,7 @@ files = [ name = "django-override-storage" version = "0.3.2" description = "Django test helpers to manage file storage side effects." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1074,6 +1133,7 @@ files = [ name = "django-pghistory" version = "2.7.0" description = "History tracking for Django and Postgres" +category = "main" optional = false python-versions = ">=3.7.0,<4" files = [ @@ -1089,6 +1149,7 @@ django-pgtrigger = ">=4.5.0" name = "django-pgtrigger" version = "4.6.0" description = "Postgres trigger support integrated with Django models." +category = "main" optional = false python-versions = ">=3.7.0,<4" files = [ @@ -1103,6 +1164,7 @@ django = ">=2" name = "django-ratelimit" version = "4.0.0" description = "Cache-based rate-limiting for Django." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1112,13 +1174,14 @@ files = [ [[package]] name = "django-ses" -version = "3.3.0" +version = "3.5.0" description = "A Django email backend for Amazon's Simple Email Service" +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ - {file = "django_ses-3.3.0-py3-none-any.whl", hash = "sha256:029edd3f23333f09cd14b4ebc274a5c3700819f16f1fcd5a25b5639b3be0fdba"}, - {file = "django_ses-3.3.0.tar.gz", hash = "sha256:bdfc5bd4bf1f95d01756761bc9dae40b79c4c709e49ff1b9bd9e853dfd09efb1"}, + {file = "django_ses-3.5.0-py3-none-any.whl", hash = "sha256:3522fe531155eb06bb015b3b36324c059194450633b33f9bd5bc9d1328822fe2"}, + {file = "django_ses-3.5.0.tar.gz", hash = "sha256:dc1644f50608fbf3a64f085a371c61d56d68eba3c5efa69651f13dc3ba05049d"}, ] [package.dependencies] @@ -1136,6 +1199,7 @@ events = ["cryptography (>=36.0.2)", "requests (>=2.27.1)"] name = "django-storages" version = "1.13.2" description = "Support for many storage backends in Django" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1158,6 +1222,7 @@ sftp = ["paramiko (>=1.10.0)"] name = "django-stubs" version = "4.2.0" description = "Mypy stubs for Django" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1181,6 +1246,7 @@ compatible-mypy = ["mypy (>=1.2.0,<1.3)"] name = "django-stubs-ext" version = "4.2.0" description = "Monkey-patching and extensions for django-stubs" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1196,6 +1262,7 @@ typing-extensions = "*" name = "django-waffle" version = "3.0.0" description = "A feature flipper for Django." +category = "main" optional = false python-versions = "*" files = [ @@ -1207,6 +1274,7 @@ files = [ name = "djangorestframework" version = "3.14.0" description = "Web APIs for Django, made easy." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1222,6 +1290,7 @@ pytz = "*" name = "djangorestframework-filters" version = "1.0.0.dev2" description = "Better filtering for Django REST Framework" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1237,6 +1306,7 @@ djangorestframework = "*" name = "djangorestframework-stubs" version = "3.14.0" description = "PEP-484 stubs for django-rest-framework" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1261,6 +1331,7 @@ markdown = ["types-Markdown (>=0.1.5)"] name = "djangorestframework-xml" version = "2.0.0" description = "XML support for Django REST Framework" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1280,6 +1351,7 @@ tests = ["Django (>=1.6)", "djangorestframework (>=2.4.3)", "flake8", "pytest", name = "docopt" version = "0.6.2" description = "Pythonic argument parser, that will make you smile" +category = "main" optional = false python-versions = "*" files = [ @@ -1290,6 +1362,7 @@ files = [ name = "drf-dynamic-fields" version = "0.3.1" description = "Dynamically return subset of Django REST Framework serializer fields" +category = "main" optional = false python-versions = "*" files = [ @@ -1301,6 +1374,7 @@ files = [ name = "elasticsearch" version = "7.17.9" description = "Python client for Elasticsearch" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" files = [ @@ -1322,6 +1396,7 @@ requests = ["requests (>=2.4.0,<3.0.0)"] name = "elasticsearch-dsl" version = "7.4.0" description = "Python client for Elasticsearch" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1341,6 +1416,7 @@ develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytes name = "exceptiongroup" version = "1.1.1" description = "Backport of PEP 654 (exception groups)" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1355,6 +1431,7 @@ test = ["pytest (>=6)"] name = "executing" version = "1.1.0" description = "Get the currently executing AST node of a frame, and other information" +category = "main" optional = false python-versions = "*" files = [ @@ -1369,6 +1446,7 @@ tests = ["asttokens", "littleutils", "pytest", "rich"] name = "exrex" version = "0.11.0" description = "Irregular methods for regular expressions" +category = "dev" optional = false python-versions = "*" files = [ @@ -1380,6 +1458,7 @@ files = [ name = "eyecite" version = "2.4.0" description = "Tool for extracting legal citations from text strings." +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1399,6 +1478,7 @@ reporters-db = ">=3.2.2,<4.0.0" name = "factory-boy" version = "3.2.1" description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1417,6 +1497,7 @@ doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"] name = "faker" version = "13.3.1" description = "Faker is a Python package that generates fake data for you." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1431,6 +1512,7 @@ python-dateutil = ">=2.4" name = "fast-diff-match-patch" version = "2.0.1" description = "fast_diff_match_patch: Python package wrapping the C++ implementation of google-diff-match-patch" +category = "main" optional = false python-versions = "*" files = [ @@ -1506,6 +1588,7 @@ files = [ name = "feedparser" version = "6.0.10" description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1520,6 +1603,7 @@ sgmllib3k = "*" name = "filelock" version = "3.9.0" description = "A platform independent file lock." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1535,6 +1619,7 @@ testing = ["covdefaults (>=2.2.2)", "coverage (>=7.0.1)", "pytest (>=7.2)", "pyt name = "flake8" version = "6.0.0" description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" optional = false python-versions = ">=3.8.1" files = [ @@ -1551,6 +1636,7 @@ pyflakes = ">=3.0.0,<3.1.0" name = "flynt" version = "0.78" description = "CLI tool to convert a python project's %-formatted strings to f-strings." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1569,6 +1655,7 @@ dev = ["build", "pre-commit", "pytest", "pytest-cov", "twine"] name = "future" version = "0.18.3" description = "Clean single-source support for Python 3 and 2" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1579,6 +1666,7 @@ files = [ name = "fuzzywuzzy" version = "0.18.0" description = "Fuzzy string matching in python" +category = "main" optional = false python-versions = "*" files = [ @@ -1593,6 +1681,7 @@ speedup = ["python-levenshtein (>=0.12)"] name = "geonamescache" version = "1.6.0" description = "Geonames data for continents, cities and US states." +category = "main" optional = false python-versions = "*" files = [ @@ -1604,6 +1693,7 @@ files = [ name = "gunicorn" version = "20.1.0" description = "WSGI HTTP Server for UNIX" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1624,6 +1714,7 @@ tornado = ["tornado (>=0.2)"] name = "h11" version = "0.13.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1635,6 +1726,7 @@ files = [ name = "html5lib" version = "1.1" description = "HTML parser based on the WHATWG HTML specification" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1656,6 +1748,7 @@ lxml = ["lxml"] name = "httplib2" version = "0.22.0" description = "A comprehensive HTTP client library." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1670,6 +1763,7 @@ pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0 name = "httptools" version = "0.5.0" description = "A collection of framework independent HTTP protocol utils." +category = "main" optional = false python-versions = ">=3.5.0" files = [ @@ -1723,6 +1817,7 @@ test = ["Cython (>=0.29.24,<0.30.0)"] name = "hyperlink" version = "21.0.0" description = "A featureful, immutable, and correct URL for Python." +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1737,6 +1832,7 @@ idna = ">=2.5" name = "identify" version = "2.5.17" description = "File identification library for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1751,6 +1847,7 @@ license = ["ukkonen"] name = "idna" version = "2.10" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1762,6 +1859,7 @@ files = [ name = "igraph" version = "0.10.4" description = "High performance graph data structures and algorithms" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1828,6 +1926,7 @@ test-musl = ["networkx (>=2.5)", "pytest (>=7.0.1)", "pytest-timeout (>=2.1.0)"] name = "incremental" version = "22.10.0" description = "\"A small library that versions your Python projects.\"" +category = "main" optional = false python-versions = "*" files = [ @@ -1843,6 +1942,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"] name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1854,6 +1954,7 @@ files = [ name = "internetarchive" version = "3.3.0" description = "A Python interface to archive.org." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1879,6 +1980,7 @@ types = ["tqdm-stubs (>=0.2.0)", "types-colorama", "types-docopt (>=0.6.10,<0.7. name = "ipaddress" version = "1.0.23" description = "IPv4/IPv6 manipulation library" +category = "main" optional = false python-versions = "*" files = [ @@ -1890,6 +1992,7 @@ files = [ name = "ipython" version = "8.10.0" description = "IPython: Productive Interactive Computing" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1928,6 +2031,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa name = "isort" version = "5.8.0" description = "A Python utility / library to sort Python imports." +category = "dev" optional = false python-versions = ">=3.6,<4.0" files = [ @@ -1944,6 +2048,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "itypes" version = "1.2.0" description = "Simple immutable types for python." +category = "main" optional = false python-versions = "*" files = [ @@ -1955,6 +2060,7 @@ files = [ name = "jedi" version = "0.18.1" description = "An autocompletion tool for Python that can be used for text editors." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1973,6 +2079,7 @@ testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"] name = "jinja2" version = "2.11.3" description = "A very fast and expressive template engine." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1990,6 +2097,7 @@ i18n = ["Babel (>=0.8)"] name = "jmespath" version = "0.10.0" description = "JSON Matching Expressions" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2001,6 +2109,7 @@ files = [ name = "jsonpatch" version = "1.32" description = "Apply JSON-Patches (RFC 6902)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -2015,6 +2124,7 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "2.1" description = "Identify specific nodes in a JSON document (RFC 6901)" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2026,6 +2136,7 @@ files = [ name = "judge-pics" version = "2.0.2" description = "Database of Judge Pictures" +category = "main" optional = false python-versions = "*" files = [ @@ -2045,6 +2156,7 @@ requests = ">=2.0,<3.0" name = "juriscraper" version = "2.5.49" description = "An API to scrape American court websites for metadata." +category = "main" optional = false python-versions = "*" files = [ @@ -2072,6 +2184,7 @@ tldextract = "*" name = "kdtree" version = "0.16" description = "A Python implemntation of a kd-tree" +category = "main" optional = false python-versions = "*" files = [ @@ -2083,6 +2196,7 @@ files = [ name = "kombu" version = "5.3.0" description = "Messaging library for Python." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2115,6 +2229,7 @@ zookeeper = ["kazoo (>=2.8.0)"] name = "lazy-object-proxy" version = "1.6.0" description = "A fast and thorough lazy object proxy." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -2146,6 +2261,7 @@ files = [ name = "lxml" version = "4.9.1" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" files = [ @@ -2231,6 +2347,7 @@ source = ["Cython (>=0.29.7)"] name = "lxml-stubs" version = "0.4.0" description = "Type annotations for the lxml package" +category = "dev" optional = false python-versions = "*" files = [ @@ -2245,6 +2362,7 @@ test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1. name = "markdown2" version = "2.4.0" description = "A fast and complete Python implementation of Markdown" +category = "main" optional = false python-versions = ">=3.5, <4" files = [ @@ -2256,6 +2374,7 @@ files = [ name = "markupsafe" version = "1.1.1" description = "Safely add untrusted strings to HTML/XML markup." +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" files = [ @@ -2317,6 +2436,7 @@ files = [ name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -2331,6 +2451,7 @@ traitlets = "*" name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2342,6 +2463,7 @@ files = [ name = "mypy" version = "1.2.0" description = "Optional static typing for Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2388,6 +2510,7 @@ reports = ["lxml"] name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2399,6 +2522,7 @@ files = [ name = "nameparser" version = "1.1.1" description = "A simple Python module for parsing human names into their individual components." +category = "main" optional = false python-versions = "*" files = [ @@ -2410,6 +2534,7 @@ files = [ name = "natsort" version = "8.3.1" description = "Simple yet flexible natural sorting in Python." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2425,8 +2550,9 @@ icu = ["PyICU (>=1.0.0)"] name = "ndg-httpsclient" version = "0.5.1" description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL" +category = "main" optional = false -python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0" +python-versions = ">=2.7,<3.0.0 || >=3.4.0" files = [ {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"}, {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"}, @@ -2441,6 +2567,7 @@ PyOpenSSL = "*" name = "networkx" version = "3.1" description = "Python package for creating and manipulating graphs and networks" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2459,6 +2586,7 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] name = "nodeenv" version = "1.7.0" description = "Node.js virtual environment builder" +category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -2473,6 +2601,7 @@ setuptools = "*" name = "nose" version = "1.3.7" description = "nose extends unittest to make testing easier" +category = "main" optional = false python-versions = "*" files = [ @@ -2485,6 +2614,7 @@ files = [ name = "numpy" version = "1.24.2" description = "Fundamental package for array computing in Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2522,6 +2652,7 @@ files = [ name = "openapi-codec" version = "1.3.2" description = "An OpenAPI codec for Core API." +category = "main" optional = false python-versions = "*" files = [ @@ -2535,6 +2666,7 @@ coreapi = ">=2.2.0" name = "outcome" version = "1.2.0" description = "Capture the outcome of Python function calls." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2549,6 +2681,7 @@ attrs = ">=19.2.0" name = "packaging" version = "23.1" description = "Core utilities for Python packages" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2560,6 +2693,7 @@ files = [ name = "pandas" version = "1.5.0" description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2604,6 +2738,7 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] name = "parso" version = "0.8.3" description = "A Python Parser" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2619,6 +2754,7 @@ testing = ["docopt", "pytest (<6.0.0)"] name = "pathspec" version = "0.9.0" description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -2630,6 +2766,7 @@ files = [ name = "pexpect" version = "4.8.0" description = "Pexpect allows easy control of interactive console applications." +category = "main" optional = false python-versions = "*" files = [ @@ -2644,6 +2781,7 @@ ptyprocess = ">=0.5" name = "pickleshare" version = "0.7.5" description = "Tiny 'shelve'-like database with concurrency support" +category = "main" optional = false python-versions = "*" files = [ @@ -2655,6 +2793,7 @@ files = [ name = "pillow" version = "9.3.0" description = "Python Imaging Library (Fork)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2729,6 +2868,7 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa name = "platformdirs" version = "2.5.1" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2744,6 +2884,7 @@ test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock name = "pluggy" version = "0.13.1" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2758,6 +2899,7 @@ dev = ["pre-commit", "tox"] name = "pre-commit" version = "3.3.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2776,6 +2918,7 @@ virtualenv = ">=20.10.0" name = "probableparsing" version = "0.0.1" description = "Common methods for propbable parsers" +category = "main" optional = false python-versions = "*" files = [ @@ -2787,6 +2930,7 @@ files = [ name = "prompt-toolkit" version = "3.0.31" description = "Library for building powerful interactive command lines in Python" +category = "main" optional = false python-versions = ">=3.6.2" files = [ @@ -2801,6 +2945,7 @@ wcwidth = "*" name = "psycopg2" version = "2.9.5" description = "psycopg2 - Python-PostgreSQL Database Adapter" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2823,6 +2968,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" +category = "main" optional = false python-versions = "*" files = [ @@ -2834,6 +2980,7 @@ files = [ name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" +category = "main" optional = false python-versions = "*" files = [ @@ -2848,6 +2995,7 @@ tests = ["pytest"] name = "pyahocorasick" version = "1.4.2" description = "pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search. With the ahocorasick.Automaton class, you can find multiple key strings occurrences at once in some input text. You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search. Implemented in C and tested on Python 2.7 and 3.4+. Works on Linux, Mac and Windows. BSD-3-clause license." +category = "main" optional = false python-versions = "*" files = [ @@ -2858,6 +3006,7 @@ files = [ name = "pyasn1" version = "0.4.8" description = "ASN.1 types and codecs" +category = "main" optional = false python-versions = "*" files = [ @@ -2869,6 +3018,7 @@ files = [ name = "pyasn1-modules" version = "0.3.0" description = "A collection of ASN.1-based protocols modules" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -2883,6 +3033,7 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pycodestyle" version = "2.10.0" description = "Python style guide checker" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2894,6 +3045,7 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2905,6 +3057,7 @@ files = [ name = "pyflakes" version = "3.0.1" description = "passive checker of Python programs" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2916,6 +3069,7 @@ files = [ name = "pygments" version = "2.13.0" description = "Pygments is a syntax highlighting package written in Python." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2930,6 +3084,7 @@ plugins = ["importlib-metadata"] name = "pylint" version = "2.17.3" description = "python code static checker" +category = "dev" optional = false python-versions = ">=3.7.2" files = [ @@ -2958,6 +3113,7 @@ testutils = ["gitpython (>3)"] name = "pyopenssl" version = "20.0.1" description = "Python wrapper module around the OpenSSL library" +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -2977,6 +3133,7 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"] name = "pyparsing" version = "2.4.7" description = "Python parsing module" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2988,6 +3145,7 @@ files = [ name = "pysocks" version = "1.7.1" description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3000,6 +3158,7 @@ files = [ name = "pystemmer" version = "2.0.1" description = "Snowball stemming algorithms, for information retrieval" +category = "main" optional = false python-versions = "*" files = [ @@ -3010,6 +3169,7 @@ files = [ name = "pytest" version = "7.3.1" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3032,6 +3192,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-django" version = "4.5.2" description = "A Django plugin for pytest." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3050,6 +3211,7 @@ testing = ["Django", "django-configurations (>=2.0)"] name = "python-crfsuite" version = "0.9.9" description = "Python binding for CRFsuite" +category = "main" optional = false python-versions = "*" files = [ @@ -3100,6 +3262,7 @@ files = [ name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -3114,6 +3277,7 @@ six = ">=1.5" name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3128,6 +3292,7 @@ cli = ["click (>=5.0)"] name = "python-levenshtein" version = "0.12.2" description = "Python extension for computing string edit distances and similarities." +category = "main" optional = false python-versions = "*" files = [ @@ -3141,6 +3306,7 @@ setuptools = "*" name = "python-magic" version = "0.4.22" description = "File type identification using libmagic" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3152,6 +3318,7 @@ files = [ name = "python-stdnum" version = "1.16" description = "Python module to handle standardized numbers and codes" +category = "main" optional = false python-versions = "*" files = [ @@ -3168,6 +3335,7 @@ soap-fallback = ["PySimpleSOAP"] name = "pytz" version = "2021.1" description = "World timezone definitions, modern and historical" +category = "main" optional = false python-versions = "*" files = [ @@ -3179,6 +3347,7 @@ files = [ name = "pytz-deprecation-shim" version = "0.1.0.post0" description = "Shims to make deprecation of pytz easier" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -3193,6 +3362,7 @@ tzdata = {version = "*", markers = "python_version >= \"3.6\""} name = "pyyaml" version = "5.4.1" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3231,6 +3401,7 @@ files = [ name = "redis" version = "3.5.3" description = "Python client for Redis key-value store" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3245,6 +3416,7 @@ hiredis = ["hiredis (>=0.1.3)"] name = "regex" version = "2022.1.18" description = "Alternative regular expression module, to replace re." +category = "main" optional = false python-versions = "*" files = [ @@ -3328,6 +3500,7 @@ files = [ name = "reporters-db" version = "3.2.36" description = "Database of Court Reporters" +category = "main" optional = false python-versions = "*" files = [ @@ -3342,6 +3515,7 @@ six = ">=1.0.0" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3363,6 +3537,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-file" version = "1.5.1" description = "File transport adapter for Requests" +category = "main" optional = false python-versions = "*" files = [ @@ -3378,6 +3553,7 @@ six = "*" name = "s3transfer" version = "0.3.6" description = "An Amazon S3 Transfer Manager" +category = "main" optional = false python-versions = "*" files = [ @@ -3392,6 +3568,7 @@ botocore = ">=1.12.36,<2.0a.0" name = "schema" version = "0.7.4" description = "Simple data validation library" +category = "main" optional = false python-versions = "*" files = [ @@ -3406,6 +3583,7 @@ contextlib2 = ">=0.5.5" name = "scipy" version = "1.10.1" description = "Fundamental algorithms for scientific computing in Python" +category = "main" optional = false python-versions = "<3.12,>=3.8" files = [ @@ -3444,6 +3622,7 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo name = "scorched" version = "0.13.1.dev0" description = "" +category = "main" optional = false python-versions = "*" files = [] @@ -3466,6 +3645,7 @@ resolved_reference = "0632024e72e22a71e17cdb778805561f7cdd33d8" name = "selenium" version = "4.9.1" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3483,6 +3663,7 @@ urllib3 = {version = ">=1.26,<3", extras = ["socks"]} name = "sentry-sdk" version = "1.14.0" description = "Python client for Sentry (https://sentry.io)" +category = "main" optional = false python-versions = "*" files = [ @@ -3521,6 +3702,7 @@ tornado = ["tornado (>=5)"] name = "service-identity" version = "21.1.0" description = "Service identity verification for pyOpenSSL & cryptography." +category = "main" optional = false python-versions = "*" files = [ @@ -3545,6 +3727,7 @@ tests = ["coverage[toml] (>=5.0.2)", "pytest"] name = "setuptools" version = "65.5.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3561,6 +3744,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "sgmllib3k" version = "1.0.0" description = "Py3k port of sgmllib." +category = "main" optional = false python-versions = "*" files = [ @@ -3571,6 +3755,7 @@ files = [ name = "simplejson" version = "3.18.3" description = "Simple, fast, extensible JSON encoder/decoder for Python" +category = "main" optional = false python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3653,6 +3838,7 @@ files = [ name = "six" version = "1.15.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3664,6 +3850,7 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3675,6 +3862,7 @@ files = [ name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +category = "main" optional = false python-versions = "*" files = [ @@ -3686,6 +3874,7 @@ files = [ name = "soupsieve" version = "2.2.1" description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -3697,6 +3886,7 @@ files = [ name = "sqlparse" version = "0.4.4" description = "A non-validating SQL parser." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3713,6 +3903,7 @@ test = ["pytest", "pytest-cov"] name = "stack-data" version = "0.5.1" description = "Extract data from python stack frames and tracebacks for informative displays" +category = "main" optional = false python-versions = "*" files = [ @@ -3732,6 +3923,7 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] name = "stripe" version = "5.2.0" description = "Python bindings for the Stripe API" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -3746,6 +3938,7 @@ requests = {version = ">=2.20", markers = "python_version >= \"3.0\""} name = "tblib" version = "1.7.0" description = "Traceback serialization library." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -3757,6 +3950,7 @@ files = [ name = "texttable" version = "1.6.4" description = "module for creating simple ASCII tables" +category = "main" optional = false python-versions = "*" files = [ @@ -3768,6 +3962,7 @@ files = [ name = "time-machine" version = "2.9.0" description = "Travel through time in your tests." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3833,6 +4028,7 @@ python-dateutil = "*" name = "timeout-decorator" version = "0.5.0" description = "Timeout decorator" +category = "main" optional = false python-versions = "*" files = [ @@ -3843,6 +4039,7 @@ files = [ name = "tldextract" version = "3.4.0" description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3860,6 +4057,7 @@ requests-file = ">=1.4" name = "tomli" version = "2.0.1" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3871,6 +4069,7 @@ files = [ name = "tomlkit" version = "0.11.8" description = "Style preserving TOML library" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3882,6 +4081,7 @@ files = [ name = "tqdm" version = "4.59.0" description = "Fast, Extensible Progress Meter" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ @@ -3898,6 +4098,7 @@ telegram = ["requests"] name = "traitlets" version = "5.4.0" description = "" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3912,6 +4113,7 @@ test = ["pre-commit", "pytest"] name = "trio" version = "0.21.0" description = "A friendly Python library for async concurrency and I/O" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3932,6 +4134,7 @@ sortedcontainers = "*" name = "trio-websocket" version = "0.9.2" description = "WebSocket library for Trio" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3948,6 +4151,7 @@ wsproto = ">=0.14" name = "twisted" version = "22.4.0" description = "An asynchronous networking framework written in Python" +category = "main" optional = false python-versions = ">=3.6.7" files = [ @@ -3988,6 +4192,7 @@ windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0. name = "twisted-iocpsupport" version = "1.0.3" description = "An extension for use in the twisted I/O Completion Ports reactor." +category = "main" optional = false python-versions = "*" files = [ @@ -4013,6 +4218,7 @@ files = [ name = "txaio" version = "23.1.1" description = "Compatibility API between asyncio/Twisted/Trollius" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4029,6 +4235,7 @@ twisted = ["twisted (>=20.3.0)", "zope.interface (>=5.2.0)"] name = "types-dateparser" version = "1.1.4.6" description = "Typing stubs for dateparser" +category = "main" optional = false python-versions = "*" files = [ @@ -4040,6 +4247,7 @@ files = [ name = "types-pyopenssl" version = "23.0.0.4" description = "Typing stubs for pyOpenSSL" +category = "dev" optional = false python-versions = "*" files = [ @@ -4054,6 +4262,7 @@ cryptography = ">=35.0.0" name = "types-python-dateutil" version = "2.8.19.12" description = "Typing stubs for python-dateutil" +category = "dev" optional = false python-versions = "*" files = [ @@ -4065,6 +4274,7 @@ files = [ name = "types-pytz" version = "2021.3.5" description = "Typing stubs for pytz" +category = "dev" optional = false python-versions = "*" files = [ @@ -4076,6 +4286,7 @@ files = [ name = "types-pyyaml" version = "6.0.4" description = "Typing stubs for PyYAML" +category = "dev" optional = false python-versions = "*" files = [ @@ -4087,6 +4298,7 @@ files = [ name = "types-redis" version = "4.5.4.1" description = "Typing stubs for redis" +category = "dev" optional = false python-versions = "*" files = [ @@ -4102,6 +4314,7 @@ types-pyOpenSSL = "*" name = "types-requests" version = "2.29.0.0" description = "Typing stubs for requests" +category = "dev" optional = false python-versions = "*" files = [ @@ -4116,6 +4329,7 @@ types-urllib3 = "<1.27" name = "types-simplejson" version = "3.19.0.0" description = "Typing stubs for simplejson" +category = "dev" optional = false python-versions = "*" files = [ @@ -4127,6 +4341,7 @@ files = [ name = "types-urllib3" version = "1.26.11" description = "Typing stubs for urllib3" +category = "dev" optional = false python-versions = "*" files = [ @@ -4138,6 +4353,7 @@ files = [ name = "typing-extensions" version = "4.1.1" description = "Backported and Experimental Type Hints for Python 3.6+" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4149,6 +4365,7 @@ files = [ name = "tzdata" version = "2022.7" description = "Provider of IANA time zone data" +category = "main" optional = false python-versions = ">=2" files = [ @@ -4160,6 +4377,7 @@ files = [ name = "tzlocal" version = "4.2" description = "tzinfo object for the local timezone" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4179,6 +4397,7 @@ test = ["pytest (>=4.3)", "pytest-mock (>=3.3)"] name = "unidecode" version = "1.2.0" description = "ASCII transliterations of Unicode text" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -4190,6 +4409,7 @@ files = [ name = "uritemplate" version = "3.0.1" description = "URI templates" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -4201,6 +4421,7 @@ files = [ name = "urllib3" version = "1.26.15" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -4220,6 +4441,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "usaddress" version = "0.5.10" description = "Parse US addresses using conditional random fields" +category = "main" optional = false python-versions = "*" files = [ @@ -4236,6 +4458,7 @@ python-crfsuite = ">=0.7" name = "uvicorn" version = "0.22.0" description = "The lightning-fast ASGI server." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4250,7 +4473,7 @@ h11 = ">=0.8" httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} @@ -4261,6 +4484,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "uvloop" version = "0.17.0" description = "Fast implementation of asyncio event loop on top of libuv" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4305,6 +4529,7 @@ test = ["Cython (>=0.29.32,<0.30.0)", "aiohttp", "flake8 (>=3.9.2,<3.10.0)", "my name = "vine" version = "5.0.0" description = "Promises, promises, promises." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4316,6 +4541,7 @@ files = [ name = "virtualenv" version = "20.17.1" description = "Virtual Python Environment builder" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -4336,6 +4562,7 @@ testing = ["coverage (>=6.2)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7 name = "watchfiles" version = "0.19.0" description = "Simple, modern and high performance file watching and code reload in python." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4370,6 +4597,7 @@ anyio = ">=3.0.0" name = "wcwidth" version = "0.2.5" description = "Measures the displayed width of unicode strings in a terminal" +category = "main" optional = false python-versions = "*" files = [ @@ -4381,6 +4609,7 @@ files = [ name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" +category = "main" optional = false python-versions = "*" files = [ @@ -4392,6 +4621,7 @@ files = [ name = "websockets" version = "11.0.3" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4471,6 +4701,7 @@ files = [ name = "wrapt" version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -4555,6 +4786,7 @@ files = [ name = "wsproto" version = "1.2.0" description = "WebSockets state-machine based protocol implementation" +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -4569,6 +4801,7 @@ h11 = ">=0.9.0,<1" name = "zope-interface" version = "6.0" description = "Interfaces for Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4615,4 +4848,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.12" -content-hash = "d435d4c7dce4af1c659d4dbe2d712e2091a1514a80439b4e4009404f69bef595" +content-hash = "d4fbe083bb0ec3fbf23ad5fa55c0819f250d024c3022878c6a8c471d094bf231" diff --git a/pyproject.toml b/pyproject.toml index 5ec87e6e65..0fe25deecb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,7 +95,7 @@ PyStemmer = "^2.0.1" factory-boy = "^3.2.1" django-elasticsearch-dsl = "^7.3" django-override-storage = "^0.3.2" -django-ses = {extras = ["events"], version = "^3.3.0"} +django-ses = {extras = ["events"], version = "^3.5.0"} django-environ = "^0.8.1" judge-pics = "^2.0.1" django-admin-cursor-paginator = "^0.1.2" From 23221dfc0e548d8da18c75508c98a6e5b2f2078e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Jun 2023 23:10:18 +0000 Subject: [PATCH 006/372] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e531956054..e9052f9eaf 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ This repository is organized in the following way: - scripts: logrotate, systemd, etc, and init scripts for our various configurations and daemons. -## Getting Involved +## Getting Involved If you want to get involved send us an email with your contact info or take a look through the [issues list][issues]. There are innumerable things we need help with, but we especially are looking for help with: From b701bc05516840bf9eb1793f304674c4085595e5 Mon Sep 17 00:00:00 2001 From: William Palin Date: Thu, 15 Jun 2023 14:05:55 -0400 Subject: [PATCH 007/372] feat(search.models): Add django-ordered-model Add django-ordered-model Add django-ordered-model to Opinions Update poetry --- cl/search/migrations/0019_order_opinions.py | 71 ++++++++++ cl/search/migrations/0019_order_opinions.sql | 129 +++++++++++++++++++ cl/search/models.py | 4 +- cl/settings/django.py | 1 + poetry.lock | 13 +- pyproject.toml | 1 + 6 files changed, 217 insertions(+), 2 deletions(-) create mode 100644 cl/search/migrations/0019_order_opinions.py create mode 100644 cl/search/migrations/0019_order_opinions.sql diff --git a/cl/search/migrations/0019_order_opinions.py b/cl/search/migrations/0019_order_opinions.py new file mode 100644 index 0000000000..5e446056cc --- /dev/null +++ b/cl/search/migrations/0019_order_opinions.py @@ -0,0 +1,71 @@ +# Generated by Django 4.2.1 on 2023-06-15 17:56 + +from django.db import migrations, models +import pgtrigger.compiler +import pgtrigger.migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("search", "0018_update_cluster_model"), + ] + + operations = [ + migrations.AlterModelOptions( + name="opinion", + options={"ordering": ("order",)}, + ), + pgtrigger.migrations.RemoveTrigger( + model_name="opinion", + name="update_or_delete_snapshot_delete", + ), + pgtrigger.migrations.RemoveTrigger( + model_name="opinion", + name="update_or_delete_snapshot_update", + ), + migrations.AddField( + model_name="opinion", + name="order", + field=models.PositiveIntegerField( + db_index=True, default=1, editable=False, verbose_name="order" + ), + preserve_default=False, + ), + migrations.AddField( + model_name="opinionevent", + name="order", + field=models.PositiveIntegerField( + default=1, editable=False, verbose_name="order" + ), + preserve_default=False, + ), + pgtrigger.migrations.AddTrigger( + model_name="opinion", + trigger=pgtrigger.compiler.Trigger( + name="update_or_delete_snapshot_update", + sql=pgtrigger.compiler.UpsertTriggerSql( + condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))', + func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;', + hash="bcac41027f469bbd394e8671cb0b2fa33e7035f3", + operation="UPDATE", + pgid="pgtrigger_update_or_delete_snapshot_update_67ecd", + table="search_opinion", + when="AFTER", + ), + ), + ), + pgtrigger.migrations.AddTrigger( + model_name="opinion", + trigger=pgtrigger.compiler.Trigger( + name="update_or_delete_snapshot_delete", + sql=pgtrigger.compiler.UpsertTriggerSql( + func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;', + hash="79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad", + operation="DELETE", + pgid="pgtrigger_update_or_delete_snapshot_delete_1f4fd", + table="search_opinion", + when="AFTER", + ), + ), + ), + ] diff --git a/cl/search/migrations/0019_order_opinions.sql b/cl/search/migrations/0019_order_opinions.sql new file mode 100644 index 0000000000..3226cb510b --- /dev/null +++ b/cl/search/migrations/0019_order_opinions.sql @@ -0,0 +1,129 @@ +BEGIN; +-- +-- Change Meta options on opinion +-- +-- (no-op) +-- +-- Remove trigger update_or_delete_snapshot_delete from model opinion +-- +DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion"; +-- +-- Remove trigger update_or_delete_snapshot_update from model opinion +-- +DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion"; +-- +-- Add field order to opinion +-- +ALTER TABLE "search_opinion" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0); +ALTER TABLE "search_opinion" ALTER COLUMN "order" DROP DEFAULT; +-- +-- Add field order to opinionevent +-- +ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0); +ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT; +-- +-- Create trigger update_or_delete_snapshot_update on model opinion +-- + + CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore( + trigger_name NAME + ) + RETURNS BOOLEAN AS $$ + DECLARE + _pgtrigger_ignore TEXT[]; + _result BOOLEAN; + BEGIN + BEGIN + SELECT INTO _pgtrigger_ignore + CURRENT_SETTING('pgtrigger.ignore'); + EXCEPTION WHEN OTHERS THEN + END; + IF _pgtrigger_ignore IS NOT NULL THEN + SELECT trigger_name = ANY(_pgtrigger_ignore) + INTO _result; + RETURN _result; + ELSE + RETURN FALSE; + END IF; + END; + $$ LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_67ecd() + RETURNS TRIGGER AS $$ + + BEGIN + IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN + IF (TG_OP = 'DELETE') THEN + RETURN OLD; + ELSE + RETURN NEW; + END IF; + END IF; + INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL; + END; + $$ LANGUAGE plpgsql; + + DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion"; + CREATE TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd + AFTER UPDATE ON "search_opinion" + + + FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr")) + EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd(); + + COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS 'bcac41027f469bbd394e8671cb0b2fa33e7035f3'; + +-- +-- Create trigger update_or_delete_snapshot_delete on model opinion +-- + + CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore( + trigger_name NAME + ) + RETURNS BOOLEAN AS $$ + DECLARE + _pgtrigger_ignore TEXT[]; + _result BOOLEAN; + BEGIN + BEGIN + SELECT INTO _pgtrigger_ignore + CURRENT_SETTING('pgtrigger.ignore'); + EXCEPTION WHEN OTHERS THEN + END; + IF _pgtrigger_ignore IS NOT NULL THEN + SELECT trigger_name = ANY(_pgtrigger_ignore) + INTO _result; + RETURN _result; + ELSE + RETURN FALSE; + END IF; + END; + $$ LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_1f4fd() + RETURNS TRIGGER AS $$ + + BEGIN + IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN + IF (TG_OP = 'DELETE') THEN + RETURN OLD; + ELSE + RETURN NEW; + END IF; + END IF; + INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL; + END; + $$ LANGUAGE plpgsql; + + DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion"; + CREATE TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd + AFTER DELETE ON "search_opinion" + + + FOR EACH ROW + EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd(); + + COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad'; + +CREATE INDEX "search_opinion_order_d54dd126" ON "search_opinion" ("order"); +COMMIT; diff --git a/cl/search/models.py b/cl/search/models.py index d04587edef..fc6aa75414 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -14,6 +14,7 @@ from django.utils.encoding import force_str from django.utils.text import slugify from eyecite import get_citations +from ordered_model.models import OrderedModel from cl.citations.utils import get_citation_depth_between_clusters from cl.custom_filters.templatetags.text_filters import best_case_name @@ -2815,7 +2816,7 @@ def sort_cites(c): @pghistory.track(AfterUpdateOrDeleteSnapshot()) -class Opinion(AbstractDateTimeModel): +class Opinion(OrderedModel, AbstractDateTimeModel): COMBINED = "010combined" UNANIMOUS = "015unamimous" LEAD = "020lead" @@ -2965,6 +2966,7 @@ class Opinion(AbstractDateTimeModel): default=False, db_index=True, ) + order_with_respect_to = "cluster" @property def siblings(self) -> QuerySet: diff --git a/cl/settings/django.py b/cl/settings/django.py index 21b1ba4a7c..a522d824df 100644 --- a/cl/settings/django.py +++ b/cl/settings/django.py @@ -162,6 +162,7 @@ "admin_cursor_paginator", "pghistory", "pgtrigger", + "ordered_model", # CourtListener Apps "cl.alerts", "cl.audio", diff --git a/poetry.lock b/poetry.lock index 7f91780e6a..b8f82b7eed 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1054,6 +1054,17 @@ files = [ {file = "django_mathfilters-1.0.0-py3-none-any.whl", hash = "sha256:64200a21bb249fbf27be601d4bbb788779e09c6e063170c097cd82c4d18ebb83"}, ] +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -4576,4 +4587,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11, <3.12" -content-hash = "674af32861e1e5bf9c31401f02a3af0b698be8b60b9492cd89ab5464218efd3e" +content-hash = "2b4d76ce134a241162a25c9634a4f9fdbf140d261750fdfca63a87ccbac4fcfd" diff --git a/pyproject.toml b/pyproject.toml index 91020cf1e0..ee00cd8366 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,6 +108,7 @@ types-dateparser = "^1.1.4.6" juriscraper = "^2.5.49" uvicorn = {extras = ["standard"], version = "^0.22.0"} daphne = "^4.0.0" +django-ordered-model = "^3.7.4" [tool.poetry.group.dev.dependencies] From 6cf0d7581be8241eda3d0b8b4a46833efb7de979 Mon Sep 17 00:00:00 2001 From: William Palin Date: Thu, 15 Jun 2023 14:57:38 -0400 Subject: [PATCH 008/372] fix(tests): Update fixtures for opinion model --- cl/search/fixtures/functest_opinions.json | 12 +++-- cl/search/fixtures/opinions-issue-412.json | 6 ++- cl/search/fixtures/opinions-issue-550.json | 6 ++- cl/search/fixtures/test_objects_search.json | 18 ++++--- .../fixtures/api_scotus_map_data.json | 6 ++- .../fixtures/scotus_map_data.json | 51 ++++++++++++------- 6 files changed, 66 insertions(+), 33 deletions(-) diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json index e4fa89a260..45f5f0b759 100644 --- a/cl/search/fixtures/functest_opinions.json +++ b/cl/search/fixtures/functest_opinions.json @@ -64,7 +64,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 10 @@ -134,7 +135,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 11 @@ -184,7 +186,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 12 @@ -254,7 +257,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 12 diff --git a/cl/search/fixtures/opinions-issue-412.json b/cl/search/fixtures/opinions-issue-412.json index ca6ac33971..2e429ebecf 100644 --- a/cl/search/fixtures/opinions-issue-412.json +++ b/cl/search/fixtures/opinions-issue-412.json @@ -64,7 +64,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 10 @@ -134,7 +135,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 11 diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json index b0163eb8f8..829a94c7d2 100644 --- a/cl/search/fixtures/opinions-issue-550.json +++ b/cl/search/fixtures/opinions-issue-550.json @@ -64,7 +64,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 10 @@ -86,7 +87,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "020lead" + "type": "020lead", + "order": 1 }, "model": "search.opinion", "pk": 11 diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json index 2255c7edcf..9fddb84fca 100644 --- a/cl/search/fixtures/test_objects_search.json +++ b/cl/search/fixtures/test_objects_search.json @@ -239,7 +239,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "020lead" + "type": "020lead", + "order": 1 }, "model": "search.opinion", "pk": 1 @@ -261,7 +262,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 2 @@ -283,7 +285,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 3 @@ -305,7 +308,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 4 @@ -327,7 +331,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 5 @@ -349,7 +354,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 6 diff --git a/cl/visualizations/fixtures/api_scotus_map_data.json b/cl/visualizations/fixtures/api_scotus_map_data.json index 5b4b19fe73..46dc2f9856 100644 --- a/cl/visualizations/fixtures/api_scotus_map_data.json +++ b/cl/visualizations/fixtures/api_scotus_map_data.json @@ -121,7 +121,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "020lead" + "type": "020lead", + "order": 1 }, "model": "search.opinion", "pk": 1 @@ -143,7 +144,8 @@ "date_created": "2015-08-15T14:10:56.801Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 2 diff --git a/cl/visualizations/fixtures/scotus_map_data.json b/cl/visualizations/fixtures/scotus_map_data.json index ce504fe2c9..a885e4df54 100644 --- a/cl/visualizations/fixtures/scotus_map_data.json +++ b/cl/visualizations/fixtures/scotus_map_data.json @@ -902,7 +902,8 @@ "date_created": "2016-02-16T19:49:54.525Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 111014 @@ -924,7 +925,8 @@ "date_created": "2016-02-16T19:49:54.545Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 111113 @@ -946,7 +948,8 @@ "date_created": "2016-02-16T19:49:54.565Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 111464 @@ -968,7 +971,8 @@ "date_created": "2016-02-16T19:49:54.610Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 111505 @@ -990,7 +994,8 @@ "date_created": "2016-02-16T19:49:54.629Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 111924 @@ -1012,7 +1017,8 @@ "date_created": "2016-02-16T19:49:54.575Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 112331 @@ -1034,7 +1040,8 @@ "date_created": "2016-02-16T19:49:54.537Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 112646 @@ -1056,7 +1063,8 @@ "date_created": "2016-02-16T19:49:54.583Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 112779 @@ -1078,7 +1086,8 @@ "date_created": "2016-02-16T19:49:54.592Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 112874 @@ -1100,7 +1109,8 @@ "date_created": "2016-02-16T19:49:54.602Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 117967 @@ -1122,7 +1132,8 @@ "date_created": "2016-02-16T19:49:54.553Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 118377 @@ -1144,7 +1155,8 @@ "date_created": "2016-02-16T19:49:54.621Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 121168 @@ -1166,7 +1178,8 @@ "date_created": "2016-02-16T19:49:54.658Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 136984 @@ -1188,7 +1201,8 @@ "date_created": "2016-02-16T19:49:54.647Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 142900 @@ -1210,7 +1224,8 @@ "date_created": "2016-02-16T19:49:54.666Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 799990 @@ -1232,7 +1247,8 @@ "date_created": "2016-02-16T19:49:54.636Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 799993 @@ -1254,7 +1270,8 @@ "date_created": "2016-02-16T19:49:54.513Z", "html_lawbox": "", "per_curiam": false, - "type": "010combined" + "type": "010combined", + "order": 1 }, "model": "search.opinion", "pk": 2674862 From 05e9d9856b543579c28de371dcf8823c8ee7e666 Mon Sep 17 00:00:00 2001 From: William Palin Date: Thu, 15 Jun 2023 15:23:23 -0400 Subject: [PATCH 009/372] fix(tests): Update fixtures for opinion model Take 2 --- .../fixtures/test_objects_query_counts.json | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json index aa909b2fb2..b51117602a 100644 --- a/cl/search/fixtures/test_objects_query_counts.json +++ b/cl/search/fixtures/test_objects_query_counts.json @@ -300,7 +300,8 @@ "date_created":"2015-08-15T14:10:56.801Z", "html_lawbox":"", "per_curiam":false, - "type":"020lead" + "type":"020lead", + "order": 1 }, "model":"search.opinion", "pk":1 @@ -324,7 +325,8 @@ "date_created":"2015-08-15T14:10:56.801Z", "html_lawbox":"", "per_curiam":false, - "type":"010combined" + "type":"010combined", + "order": 1 }, "model":"search.opinion", "pk":2 @@ -348,7 +350,8 @@ "date_created":"2015-08-15T14:10:56.801Z", "html_lawbox":"", "per_curiam":false, - "type":"010combined" + "type":"010combined", + "order": 1 }, "model":"search.opinion", "pk":3 @@ -371,7 +374,8 @@ "date_created":"2015-08-15T14:10:56.801Z", "html_lawbox":"", "per_curiam":false, - "type":"010combined" + "type":"010combined", + "order": 1 }, "model":"search.opinion", "pk":4 @@ -395,7 +399,8 @@ "date_created":"2015-08-15T14:10:56.801Z", "html_lawbox":"", "per_curiam":false, - "type":"010combined" + "type":"010combined", + "order": 1 }, "model":"search.opinion", "pk":5 @@ -418,7 +423,8 @@ "date_created":"2015-08-15T14:10:56.801Z", "html_lawbox":"", "per_curiam":false, - "type":"010combined" + "type":"010combined", + "order": 1 }, "model":"search.opinion", "pk":6 From b0fc70a56055699c551b59a3ed38a005459905e3 Mon Sep 17 00:00:00 2001 From: William Palin Date: Fri, 16 Jun 2023 12:46:45 -0400 Subject: [PATCH 010/372] feat(models): Override django-ordered-model default By default it sorts by order - so if we dont want that feature we simply need to override the django order with a custom ordered manager in on the opinion class. (I think) --- cl/search/models.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cl/search/models.py b/cl/search/models.py index fc6aa75414..7fc2c03458 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -14,7 +14,7 @@ from django.utils.encoding import force_str from django.utils.text import slugify from eyecite import get_citations -from ordered_model.models import OrderedModel +from ordered_model.models import OrderedModel, OrderedModelManager from cl.citations.utils import get_citation_depth_between_clusters from cl.custom_filters.templatetags.text_filters import best_case_name @@ -2815,6 +2815,13 @@ def sort_cites(c): return 8 +class CustomOrderedManager(OrderedModelManager): + """Override the django ordered model default ordering""" + + def get_queryset(self): + return super().get_queryset().order_by() + + @pghistory.track(AfterUpdateOrDeleteSnapshot()) class Opinion(OrderedModel, AbstractDateTimeModel): COMBINED = "010combined" @@ -2968,6 +2975,8 @@ class Opinion(OrderedModel, AbstractDateTimeModel): ) order_with_respect_to = "cluster" + objects = CustomOrderedManager() + @property def siblings(self) -> QuerySet: # These are other sub-opinions of the current cluster. From b8fa44563ac4bb42d6ad3020c604da8f8940f187 Mon Sep 17 00:00:00 2001 From: William Palin Date: Fri, 16 Jun 2023 13:44:51 -0400 Subject: [PATCH 011/372] fix(models): Different override for ordering on OP --- cl/search/models.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cl/search/models.py b/cl/search/models.py index 7fc2c03458..be645bc5e8 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -14,7 +14,7 @@ from django.utils.encoding import force_str from django.utils.text import slugify from eyecite import get_citations -from ordered_model.models import OrderedModel, OrderedModelManager +from ordered_model.models import OrderedModel from cl.citations.utils import get_citation_depth_between_clusters from cl.custom_filters.templatetags.text_filters import best_case_name @@ -2815,13 +2815,6 @@ def sort_cites(c): return 8 -class CustomOrderedManager(OrderedModelManager): - """Override the django ordered model default ordering""" - - def get_queryset(self): - return super().get_queryset().order_by() - - @pghistory.track(AfterUpdateOrDeleteSnapshot()) class Opinion(OrderedModel, AbstractDateTimeModel): COMBINED = "010combined" @@ -2975,7 +2968,8 @@ class Opinion(OrderedModel, AbstractDateTimeModel): ) order_with_respect_to = "cluster" - objects = CustomOrderedManager() + class Meta: + ordering = () @property def siblings(self) -> QuerySet: From 7429eba0290bc2f931489b5799e90de318cd1512 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Fri, 28 Jul 2023 19:30:18 -0600 Subject: [PATCH 012/372] fix(poetry): Fix merge conflicts --- poetry.lock | 13 ++++++++++++- pyproject.toml | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index e034727158..24dc7977e0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1072,6 +1072,17 @@ files = [ {file = "django_mathfilters-1.0.0-py3-none-any.whl", hash = "sha256:64200a21bb249fbf27be601d4bbb788779e09c6e063170c097cd82c4d18ebb83"}, ] +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -4690,4 +4701,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11, <3.12" -content-hash = "10446165560282337aada87c0f3a9324dc904777bbfcc0f7e35db5c9d13a10a9" +content-hash = "7c0448e0852dba4f13177892cc0e619e2b58470f4d82707d8069fbeceb1cb919" diff --git a/pyproject.toml b/pyproject.toml index f9d568defa..2caee093ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,7 @@ daphne = "^4.0.0" psycopg2 = "^2.9.6" juriscraper = "^2.5.51" httpx = {extras = ["http2"], version = "^0.24.1"} +django-ordered-model = "^3.7.4" [tool.poetry.group.dev.dependencies] From f45a093c6b02ed5ae4a1077062295fa25f1c4894 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Mon, 31 Jul 2023 19:43:51 -0600 Subject: [PATCH 013/372] fix(models): Add 'order' field as default ordering for Opinion model Test added for django-ordered-model library Optimize imports in search/tests.py --- cl/search/models.py | 2 +- cl/search/tests.py | 69 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/cl/search/models.py b/cl/search/models.py index 5024bdcc3d..e50987c3f6 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -2986,7 +2986,7 @@ class Opinion(OrderedModel, AbstractDateTimeModel): order_with_respect_to = "cluster" class Meta: - ordering = () + ordering = ("order",) @property def siblings(self) -> QuerySet: diff --git a/cl/search/tests.py b/cl/search/tests.py index b5c71c9b16..c4edb24b31 100644 --- a/cl/search/tests.py +++ b/cl/search/tests.py @@ -5,7 +5,7 @@ from datetime import date from functools import reduce from pathlib import Path -from unittest import mock, skipUnless +from unittest import mock import pytz from asgiref.sync import sync_to_async @@ -19,9 +19,8 @@ from django.db import IntegrityError, transaction from django.http import HttpRequest from django.test import AsyncRequestFactory, override_settings -from django.test.utils import captured_stderr from django.urls import reverse -from elasticsearch_dsl import Q, connections +from elasticsearch_dsl import Q from factory import RelatedFactory from lxml import etree, html from rest_framework.status import HTTP_200_OK @@ -58,6 +57,7 @@ DocketFactory, OpinionClusterFactory, OpinionClusterFactoryWithChildrenAndParents, + OpinionFactory, OpinionsCitedWithParentsFactory, OpinionWithChildrenFactory, OpinionWithParentsFactory, @@ -283,6 +283,69 @@ def test_custom_manager_chained_filter(self) -> None: ) self.assertEqual(cluster_count, expected_count) + def test_opinions_order(self) -> None: + """Test django-ordered-model library""" + + # Create court + court = CourtFactory(id="nyappdiv") + + # Create cluster + cluster = OpinionClusterFactory( + case_name="Foo v. Bar", + case_name_short="Foo v. Bar", + docket=DocketFactory( + court=court, + ), + date_filed=date(1978, 3, 10), + source="U", + precedential_status=PRECEDENTIAL_STATUS.PUBLISHED, + ) + + # Create three opinions + op_1 = OpinionFactory( + cluster=cluster, + type="Concurrence Opinion", + ) + + op_2 = OpinionFactory( + cluster=cluster, + type="Dissent", + ) + + op_3 = OpinionFactory( + cluster=cluster, + type="Lead Opinion", + ) + + # Test that the value of the order field matches the order in which + # they were created + self.assertEqual(op_1.order, 0) + self.assertEqual(op_2.order, 1) + self.assertEqual(op_3.order, 2) + + # Use library method to move lead opinion to first position, we can + # use this function to easily reorder existing opinions + op_3.to(0) + + # The position of the elements was modified, we refresh the objects + op_1.refresh_from_db() + op_2.refresh_from_db() + op_3.refresh_from_db() + + # Test new order + self.assertEqual(op_3.order, 0) + self.assertEqual(op_1.order, 1) + self.assertEqual(op_2.order, 2) + + # Add new opinion to cluster + op_4 = OpinionFactory( + cluster=cluster, + type="Dissent", + ) + + # Test that the new opinion is in last place + self.assertEqual(op_4.order, 3) + class DocketValidationTest(TestCase): @classmethod From 37dee19fcfacf95a79aac71c21ccc507d10289b4 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 30 Aug 2023 11:27:50 -0600 Subject: [PATCH 014/372] fix(opinion_order): fix merge conflicts with main --- poetry.lock | 17 ++++++++++++++--- pyproject.toml | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index a23818c13e..9b7321deb1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "amqp" @@ -1094,6 +1094,17 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -2718,7 +2729,7 @@ name = "ndg-httpsclient" version = "0.5.1" description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL" optional = false -python-versions = ">=2.7,<3.0.0 || >=3.4.0" +python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0" files = [ {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"}, {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"}, @@ -5091,4 +5102,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11, <3.12" -content-hash = "5257a6d2a26b74054bac82d0c5700a55f1e2e2ec580608921e8a27a76d015f52" +content-hash = "46adbdc75bf4ad70aa4d6531f4d71a8f22f1e85ee9886408e921e7147aab7a36" diff --git a/pyproject.toml b/pyproject.toml index 87d6e90ff9..ef5970143f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,6 +113,7 @@ juriscraper = "^2.5.51" httpx = {extras = ["http2"], version = "^0.24.1"} django-model-utils = "^4.3.1" inflection = "^0.5.1" # necessary for DRF schema generation - remove after drf-spectacular +django-ordered-model = "^3.7.4" [tool.poetry.group.dev.dependencies] From 3b4cb06ef8724d5052f9868f8d77388acfe18be1 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 30 Aug 2023 11:55:03 -0600 Subject: [PATCH 015/372] fix(opinion_order): rename migrations --- .../{0019_order_opinions.py => 0020_order_opinions.py} | 2 +- .../{0019_order_opinions.sql => 0020_order_opinions.sql} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cl/search/migrations/{0019_order_opinions.py => 0020_order_opinions.py} (99%) rename cl/search/migrations/{0019_order_opinions.sql => 0020_order_opinions.sql} (100%) diff --git a/cl/search/migrations/0019_order_opinions.py b/cl/search/migrations/0020_order_opinions.py similarity index 99% rename from cl/search/migrations/0019_order_opinions.py rename to cl/search/migrations/0020_order_opinions.py index 5e446056cc..f614156360 100644 --- a/cl/search/migrations/0019_order_opinions.py +++ b/cl/search/migrations/0020_order_opinions.py @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("search", "0018_update_cluster_model"), + ("search", "0019_add_docket_source_noop"), ] operations = [ diff --git a/cl/search/migrations/0019_order_opinions.sql b/cl/search/migrations/0020_order_opinions.sql similarity index 100% rename from cl/search/migrations/0019_order_opinions.sql rename to cl/search/migrations/0020_order_opinions.sql From 878b9479e9c95b429b16c6bd044a2315b6cce3f3 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 30 Aug 2023 17:11:30 -0600 Subject: [PATCH 016/372] feat(opinion_order): management command to update the order of harvard and columbia opinions --- .../commands/update_opinions_order.py | 598 ++++++++++++++++++ 1 file changed, 598 insertions(+) create mode 100644 cl/corpus_importer/management/commands/update_opinions_order.py diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py new file mode 100644 index 0000000000..f48de154a0 --- /dev/null +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -0,0 +1,598 @@ +import re +from typing import Any, Optional + +from bs4 import BeautifulSoup, NavigableString, Tag +from django.core.management import BaseCommand +from django.db.models import Count + +from cl.corpus_importer.utils import similarity_scores +from cl.lib.command_utils import logger +from cl.lib.string_diff import get_cosine_similarity +from cl.search.models import Opinion, OpinionCluster + +# TODO Should we add a flag to know that the cluster has been processed? + + +def match_text_lists( + file_opinions_list: list[str], cl_opinions_list: list[str] +) -> dict[int, Any]: + """Generate matching lists above threshold + :param file_opinions_list: Opinions from file + :param cl_opinions_list: CL opinions + :return: Matches if found or False + """ + # We import this here to avoid a circular import + from cl.corpus_importer.management.commands.harvard_opinions import ( + compare_documents, + ) + + scores = similarity_scores(file_opinions_list, cl_opinions_list) + + matches = {} + for i, row in enumerate(scores): + j = row.argmax() # type: ignore + # Lower threshold for small opinions. + if ( + get_cosine_similarity(file_opinions_list[i], cl_opinions_list[j]) + < 0.60 + ): + continue + percent_match = compare_documents( + file_opinions_list[i], cl_opinions_list[j] + ) + if percent_match < 60: + continue + matches[i] = j + + # Key is opinion position from file, Value is opinion position from cl opinion + # e.g. matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file + # opinion and 2 is cl opinion + return matches + + +def get_opinion_content( + cluster_id, +) -> tuple[Optional[str], list[dict], int, bool]: + """Get the opinions content for a cluster object + :param cluster_id: Cluster ID for a set of opinions + :return: (xml path, list of extracted opinions, start position, True if combined + opinions exists in cluster) + """ + cl_cleaned_opinions = [] + # by default the opinions are ordered by pk + opinions_from_cluster = Opinion.objects.filter( + cluster_id=cluster_id + ).order_by("id") + combined_opinions_cluster = opinions_from_cluster.filter( + type="010combined" + ) + xml_path = None + combined_opinion = False + if combined_opinions_cluster: + # the combined opinion will be displayed at beginning + start_position = combined_opinions_cluster.count() + combined_opinion = True + else: + # we don't have combined opinions, we start ordering from 0 to n + start_position = 0 + + for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")): + if op.local_path and not xml_path: + xml_path = op.local_path + content = None + if len(op.html_with_citations) > 1: + content = op.html_with_citations + elif len(op.html_columbia) > 1: + content = op.html_columbia + elif len(op.html_lawbox) > 1: + content = op.html_lawbox + elif len(op.plain_text) > 1: + content = op.plain_text + elif len(op.html) > 1: + content = op.html + elif len(op.xml_harvard) > 1: + content = op.xml_harvard + if content: + soup = BeautifulSoup(content, features="html.parser") + prep_text = re.sub( + r"[^a-zA-Z0-9 ]", "", soup.getText(separator=" ").lower() + ) + prep_text = re.sub(" +", " ", prep_text) + cl_cleaned_opinions.append( + { + "id": op.id, + "byline": op.author_str, + "type": op.type, + "opinion": prep_text, + "order": i, + } + ) + + return xml_path, cl_cleaned_opinions, start_position, combined_opinion + + +def get_opinions_columbia_xml(xml_filepath: str) -> list: + """Convert xml data into dict + :param xml_filepath: path of xml file + :return: dict with data + """ + + SIMPLE_TAGS = [ + "attorneys", + "caption", + "citation", + "court", + "date", + "docket", + "hearing_date", + "panel", + "posture", + "reporter_caption", + ] + + data = {} # type: dict + + with open(xml_filepath, "r", encoding="utf-8") as f: + file_content = f.read() + + data["unpublished"] = False + + if "" in file_content: + file_content = file_content.replace( + "", "" + ) + file_content = file_content.replace("", "").replace( + "", "" + ) + + data["unpublished"] = True + + # Sometimes opening and ending tag mismatch (e.g. c6b39dcb29c9c.xml) + file_content = file_content.replace( + "", "" + ) + + soup = BeautifulSoup(file_content, "lxml") + + # Find the outer tag to have all elements inside + find_opinion = soup.find("opinion") + + step_one_opinions = [] # type: list + opinions = [] # type: list + order = 0 + + if find_opinion: + untagged_content = [] + + # We iterate all content, with and without tags + # STEP 1: Extract all content in multiple dict elements + for i, content in enumerate(find_opinion): # type: int, Tag + if type(content) == NavigableString: + # We found a raw string, store it + untagged_content.append(str(content)) + + else: + if content.name in SIMPLE_TAGS + [ + "citation_line", + "opinion_byline", + "dissent_byline", + "concurrence_byline", + ]: + # Ignore these tags, it will be processed later + continue + elif content.name in [ + "opinion_text", + "dissent_text", + "concurrence_text", + ]: + if untagged_content: + # We found something other than a navigable string that is + # not an opinion, but now we have found an opinion, + # let's create this content first + + # default type + op_type = "opinion" + if step_one_opinions: + if step_one_opinions[-1].get("type"): + # use type of previous opinion if exists + op_type = step_one_opinions[-1].get("type") + + # Get rid of double spaces + opinion_content = re.sub( + " +", " ", "\n".join(untagged_content) + ).strip() # type: str + if opinion_content: + step_one_opinions.append( + { + "opinion": opinion_content, + "order": order, + "byline": "", + "type": op_type, + } + ) + order = order + 1 + untagged_content = [] + + byline = content.find_previous_sibling() + opinion_author = "" + if byline and "_byline" in byline.name: + opinion_author = byline.get_text() + + opinion_content = re.sub( + " +", " ", content.decode_contents() + ).strip() + if opinion_content: + step_one_opinions.append( + { + "opinion": opinion_content, + "order": order, + "byline": opinion_author, + "type": content.name.replace("_text", ""), + } + ) + order = order + 1 + + else: + # Content not inside _text tag, we store it + untagged_content.append(str(content)) + + if untagged_content: + # default type + op_type = "opinion" + if step_one_opinions: + if step_one_opinions[-1].get("type"): + # use type of previous opinion if exists + op_type = step_one_opinions[-1].get("type") + + opinion_content = re.sub( + " +", " ", "\n".join(untagged_content) + ).strip() + if opinion_content: + step_one_opinions.append( + { + "opinion": opinion_content, + "order": order, + "byline": "", + "type": op_type, + } + ) + + # Step 2: Merge found content in the xml file + new_order = 0 + authorless_content = [] + + for i, found_content in enumerate(step_one_opinions, start=1): + byline = found_content.get("byline") + if not byline: + # Opinion has no byline, store it + authorless_content.append(found_content) + + if byline: + # Opinion has byline + opinion_type = found_content.get("type") + opinion_content = found_content.get("opinion", "") + # Store content that doesn't match the current type + alternative_authorless_content = [ + z + for z in authorless_content + if z.get("type") != opinion_type + ] + # Keep content that matches the current type + authorless_content = [ + z + for z in authorless_content + if z.get("type") == opinion_type + ] + + if alternative_authorless_content: + # Keep floating text that are not from the same type, + # we need to create a separate opinion for those, + # for example: in 2713f39c5a8e8684.xml we have an opinion + # without an author, and the next opinion with an author is + # a dissent opinion, we can't combine both + + # We check if the previous stored opinion matches the type of the + # content + relevant_opinions = ( + [opinions[-1]] + if opinions + and opinions[-1]["type"] + == alternative_authorless_content[0].get("type") + else [] + ) + + if relevant_opinions: + previous_opinion = relevant_opinions[-1] + if previous_opinion.get( + "type" + ) == alternative_authorless_content[0].get("type"): + # Merge last opinion with previous opinion, it probably + # belongs the same author + relevant_opinions[-1][ + "opinion" + ] += "\n" + "\n".join( + [ + f.get("opinion") + for f in alternative_authorless_content + if f.get("opinion") + ] + ) + authorless_content = [] + + else: + # No relevant opinions found, create a new opinion + new_opinion = { + "byline": None, + "type": alternative_authorless_content[0].get( + "type" + ), + "opinion": "\n".join( + [ + f.get("opinion") + for f in alternative_authorless_content + if f.get("opinion") + ] + ), + "order": new_order, + } + new_order = new_order + 1 + opinions.append(new_opinion) + + # Add new opinion + new_opinion = { + "byline": byline, + "type": opinion_type, + "opinion": "\n".join( + [ + f.get("opinion") + for f in authorless_content + if f.get("type") == opinion_type + ] + ) + + "\n\n" + + opinion_content, + "order": new_order, + } + + opinions.append(new_opinion) + new_order = new_order + 1 + authorless_content = [] + + if len(step_one_opinions) == i and authorless_content: + # If is the last opinion, and we still have opinions without + # byline, create an opinion without an author and the contents + # that couldn't be merged + + # We check if the previous stored opinion matches the type of the + # content + relevant_opinions = ( + [opinions[-1]] + if opinions + and opinions[-1]["type"] + == authorless_content[0].get("type") + else [] + ) + + if relevant_opinions: + previous_opinion = relevant_opinions[-1] + if previous_opinion.get("type") == authorless_content[ + 0 + ].get("type"): + # Merge last opinion with previous opinion, it probably + # belongs the same author + relevant_opinions[-1]["opinion"] += "\n" + "\n".join( + [ + f.get("opinion") + for f in authorless_content + if f.get("opinion") + ] + ) + + else: + # Create last floating opinion + new_opinion = { + "byline": None, + "type": authorless_content[0].get("type"), + "opinion": "\n".join( + [ + f.get("opinion") + for f in authorless_content + if f.get("opinion") + ] + ), + "order": new_order, + } + opinions.append(new_opinion) + + for op in opinions: + opinion_content = op.get("opinion") + opinion_content = BeautifulSoup( + opinion_content, "html.parser" + ).getText() + opinion_content = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_content.lower()) + op["opinion"] = opinion_content + + return opinions + + +def run_harvard(): + """ + We assume that harvard data is already ordered, we just need to fill the order + field in each opinion + """ + + # Get all harvard clusters with more than one opinion + clusters = ( + OpinionCluster.objects.prefetch_related("sub_opinions") + .annotate(opinions_count=Count("sub_opinions")) + .filter(opinions_count__gt=1, source="U") + ) + # print(clusters.query) + print("clusters", len(clusters)) + + # cluster_id: 4697264, the combined opinion will go to the last position + for oc in clusters: + combined_opinions_cluster = oc.sub_opinions.filter( + type="010combined" + ).order_by("id") + if combined_opinions_cluster: + # the combined opinion will be displayed at first + start_position = combined_opinions_cluster.count() + else: + # we don't have combined opinions, we start ordering from 0 to n + start_position = 0 + + print("combined_opinions_cluster", combined_opinions_cluster) + for opinion_order, cluster_op in enumerate( + oc.sub_opinions.exclude(type="010combined").order_by("id"), + start=start_position, + ): + cluster_op.order = opinion_order + cluster_op.save() + + # Show combined opinions at beginning + for opinion_order, cluster_op in enumerate(combined_opinions_cluster): + cluster_op.order = opinion_order + cluster_op.save() + + logger.info(msg=f"Opinions reordered for cluster id: {oc.id}") + + +def run_columbia(): + """ + Update opinion order for columbia clusters + """ + + # Get all columbia cluster ids with more than one opinion + clusters = ( + OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions")) + .filter(opinions_count__gt=1, source="Z") + .order_by("id") + .values_list("id") + ) + + for cluster_id in clusters: + logger.info(f"Processing cluster id: {cluster_id}") + ( + xml_path, + cl_cleaned_opinions, + start_position, + combined_opinion, + ) = get_opinion_content(cluster_id) + + columbia_opinions = None + if xml_path: + columbia_opinions = get_opinions_columbia_xml(xml_path) + + if cl_cleaned_opinions and columbia_opinions: + matches = match_text_lists( + [op.get("opinion") for op in columbia_opinions], + [op.get("opinion") for op in cl_cleaned_opinions], + ) + + if matches: + if len(matches.values()) != len(set(matches.values())): + # We don't have a unique match for each opinion, they were + # probably combined incorrectly + logger.info( + f"We can't infer opinions order for cluster id: {cluster_id}" + ) + # Go to next cluster id + continue + + if len(cl_cleaned_opinions) > len(set(matches.values())): + # We have more opinions than matches + logger.info( + f"We couldn't match all cl opinions to the file's " + f"content, cluster id: {cluster_id}" + ) + # Go to next cluster id + continue + + failed = False + for file_pos, cl_pos in matches.items(): + # file_pos is the correct index to find the opinion id to update + file_opinion = columbia_opinions[file_pos] + # the order was calculated using the xml file + file_order = file_opinion.get("order") + start_position + cl_opinion = cl_cleaned_opinions[cl_pos] + opinion_id_to_update = cl_opinion.get("id") + + if opinion_id_to_update: + try: + # Save opinion + op = Opinion.objects.get(id=opinion_id_to_update) + op.order = file_order + op.save() + logger.info( + f"Cluster id processed: {cluster_id} Update opinion id: {opinion_id_to_update} with position: {file_order}" + ) + except Opinion.DoesNotExist: + logger.warning( + f"We can't update opinion, opinion doesn't exist with " + f"id: {opinion_id_to_update}" + ) + failed = True + break + else: + logger.warning( + f"We can't update opinion, empty opinion id " + f"from cluster: {cluster_id}" + ) + failed = True + break + + if combined_opinion and not failed: + combined_opinions_cluster = Opinion.objects.filter( + cluster_id=cluster_id, type="010combined" + ).order_by("id") + + # Show combined opinions at beginning + for opinion_order, cluster_op in enumerate( + combined_opinions_cluster + ): + cluster_op.order = opinion_order + cluster_op.save() + + else: + # No matches found + logger.warning( + f"Failed to match opinions from cluster id: {cluster_id}" + ) + continue + + +class Command(BaseCommand): + help = "Fill order field in Opinion objects" + + def __init__(self, *args, **kwargs): + super(Command, self).__init__(*args, **kwargs) + + def add_arguments(self, parser): + parser.add_argument( + "--process-harvard", + action="store_true", + help="Fix harvard opinions order", + ) + + parser.add_argument( + "--process-columbia", + action="store_true", + help="Fix columbia opinions order", + ) + + def handle(self, *args, **options): + print("harvard", options["process_harvard"]) + print("columbia", options["process_columbia"]) + + if options["process_harvard"] and options["process_columbia"]: + print( + "You can only select one option process-harvard or process-columbia" + ) + return + + if options["process_harvard"]: + run_harvard() + + if options["process_columbia"]: + run_columbia() From c3a5c4a2a0ad002b075ea69b3a0757bbef684a1f Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 30 Aug 2023 19:13:02 -0600 Subject: [PATCH 017/372] feat(opinion_order): exception when xml file not found --- .../commands/update_opinions_order.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index f48de154a0..0560c506ba 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -4,6 +4,7 @@ from bs4 import BeautifulSoup, NavigableString, Tag from django.core.management import BaseCommand from django.db.models import Count +from django.db.models.fields.files import FieldFile from cl.corpus_importer.utils import similarity_scores from cl.lib.command_utils import logger @@ -52,7 +53,7 @@ def match_text_lists( def get_opinion_content( cluster_id, -) -> tuple[Optional[str], list[dict], int, bool]: +) -> tuple[Optional[FieldFile], list[dict], int, bool]: """Get the opinions content for a cluster object :param cluster_id: Cluster ID for a set of opinions :return: (xml path, list of extracted opinions, start position, True if combined @@ -78,6 +79,8 @@ def get_opinion_content( for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")): if op.local_path and not xml_path: + # We store the field because we are using S3 for storage and that backend + # doesn't support absolute paths xml_path = op.local_path content = None if len(op.html_with_citations) > 1: @@ -111,7 +114,7 @@ def get_opinion_content( return xml_path, cl_cleaned_opinions, start_position, combined_opinion -def get_opinions_columbia_xml(xml_filepath: str) -> list: +def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list: """Convert xml data into dict :param xml_filepath: path of xml file :return: dict with data @@ -132,8 +135,8 @@ def get_opinions_columbia_xml(xml_filepath: str) -> list: data = {} # type: dict - with open(xml_filepath, "r", encoding="utf-8") as f: - file_content = f.read() + with xml_filepath.open("r") as f: + file_content = f.read().decode("utf-8") data["unpublished"] = False @@ -432,6 +435,7 @@ def run_harvard(): # cluster_id: 4697264, the combined opinion will go to the last position for oc in clusters: + logger.info(f"Processing cluster id: {oc}") combined_opinions_cluster = oc.sub_opinions.filter( type="010combined" ).order_by("id") @@ -468,7 +472,7 @@ def run_columbia(): OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions")) .filter(opinions_count__gt=1, source="Z") .order_by("id") - .values_list("id") + .values_list("id", flat=True) ) for cluster_id in clusters: @@ -482,7 +486,11 @@ def run_columbia(): columbia_opinions = None if xml_path: - columbia_opinions = get_opinions_columbia_xml(xml_path) + try: + columbia_opinions = get_opinions_columbia_xml(xml_path) + except FileNotFoundError: + logger.warning(f"Xml file not found, cluster id: {cluster_id}") + continue if cl_cleaned_opinions and columbia_opinions: matches = match_text_lists( From 6ba8d3d3b1048ba4dfaf79ef60b72bf5fff8e55f Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 30 Aug 2023 19:37:30 -0600 Subject: [PATCH 018/372] feat(opinion_order): add param to resume command to order opinions --- .../commands/update_opinions_order.py | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index 0560c506ba..d4d915695d 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -82,6 +82,7 @@ def get_opinion_content( # We store the field because we are using S3 for storage and that backend # doesn't support absolute paths xml_path = op.local_path + # print("url", op.local_path.url) content = None if len(op.html_with_citations) > 1: content = op.html_with_citations @@ -136,7 +137,7 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list: data = {} # type: dict with xml_filepath.open("r") as f: - file_content = f.read().decode("utf-8") + file_content = f.read() data["unpublished"] = False @@ -418,10 +419,11 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list: return opinions -def run_harvard(): +def run_harvard(start_id: int): """ We assume that harvard data is already ordered, we just need to fill the order field in each opinion + :param start_id: skip any id lower than this value """ # Get all harvard clusters with more than one opinion @@ -429,9 +431,11 @@ def run_harvard(): OpinionCluster.objects.prefetch_related("sub_opinions") .annotate(opinions_count=Count("sub_opinions")) .filter(opinions_count__gt=1, source="U") + .order_by("id") ) - # print(clusters.query) - print("clusters", len(clusters)) + + if start_id: + clusters = clusters.filter(pk__gte=start_id) # cluster_id: 4697264, the combined opinion will go to the last position for oc in clusters: @@ -446,7 +450,6 @@ def run_harvard(): # we don't have combined opinions, we start ordering from 0 to n start_position = 0 - print("combined_opinions_cluster", combined_opinions_cluster) for opinion_order, cluster_op in enumerate( oc.sub_opinions.exclude(type="010combined").order_by("id"), start=start_position, @@ -462,9 +465,10 @@ def run_harvard(): logger.info(msg=f"Opinions reordered for cluster id: {oc.id}") -def run_columbia(): +def run_columbia(start_id: int): """ Update opinion order for columbia clusters + :param start_id: skip any id lower than this value """ # Get all columbia cluster ids with more than one opinion @@ -475,6 +479,9 @@ def run_columbia(): .values_list("id", flat=True) ) + if start_id: + clusters = filter(lambda x: x >= start_id, clusters) + for cluster_id in clusters: logger.info(f"Processing cluster id: {cluster_id}") ( @@ -589,10 +596,14 @@ def add_arguments(self, parser): help="Fix columbia opinions order", ) - def handle(self, *args, **options): - print("harvard", options["process_harvard"]) - print("columbia", options["process_columbia"]) + parser.add_argument( + "--start-id", + type=int, + default=0, + help="Skip any id lower than this value", + ) + def handle(self, *args, **options): if options["process_harvard"] and options["process_columbia"]: print( "You can only select one option process-harvard or process-columbia" @@ -600,7 +611,7 @@ def handle(self, *args, **options): return if options["process_harvard"]: - run_harvard() + run_harvard(options["start_id"]) if options["process_columbia"]: - run_columbia() + run_columbia(options["start_id"]) From 71ec6241cc0c06d4aaebfb71a0cec188eb39a11a Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 30 Aug 2023 19:47:56 -0600 Subject: [PATCH 019/372] feat(opinion_order): add new param for command --- .../commands/update_opinions_order.py | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index d4d915695d..7a46530a82 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -11,8 +11,6 @@ from cl.lib.string_diff import get_cosine_similarity from cl.search.models import Opinion, OpinionCluster -# TODO Should we add a flag to know that the cluster has been processed? - def match_text_lists( file_opinions_list: list[str], cl_opinions_list: list[str] @@ -419,11 +417,12 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list: return opinions -def run_harvard(start_id: int): +def run_harvard(start_id: int, end_id: int): """ We assume that harvard data is already ordered, we just need to fill the order field in each opinion :param start_id: skip any id lower than this value + :param end_id: skip any id greater than this value """ # Get all harvard clusters with more than one opinion @@ -437,6 +436,9 @@ def run_harvard(start_id: int): if start_id: clusters = clusters.filter(pk__gte=start_id) + if end_id: + clusters = clusters.filter(pk__lte=end_id) + # cluster_id: 4697264, the combined opinion will go to the last position for oc in clusters: logger.info(f"Processing cluster id: {oc}") @@ -465,10 +467,11 @@ def run_harvard(start_id: int): logger.info(msg=f"Opinions reordered for cluster id: {oc.id}") -def run_columbia(start_id: int): +def run_columbia(start_id: int, end_id: int): """ Update opinion order for columbia clusters :param start_id: skip any id lower than this value + :param end_id: skip any id greater than this value """ # Get all columbia cluster ids with more than one opinion @@ -482,6 +485,9 @@ def run_columbia(start_id: int): if start_id: clusters = filter(lambda x: x >= start_id, clusters) + if end_id: + clusters = filter(lambda x: x <= end_id, clusters) + for cluster_id in clusters: logger.info(f"Processing cluster id: {cluster_id}") ( @@ -600,7 +606,14 @@ def add_arguments(self, parser): "--start-id", type=int, default=0, - help="Skip any id lower than this value", + help="Start id for a range of clusters (inclusive)", + ) + + parser.add_argument( + "--end-id", + type=int, + default=0, + help="End id for a range of clusters (inclusive)", ) def handle(self, *args, **options): @@ -611,7 +624,7 @@ def handle(self, *args, **options): return if options["process_harvard"]: - run_harvard(options["start_id"]) + run_harvard(options["start_id"], options["end_id"]) if options["process_columbia"]: - run_columbia(options["start_id"]) + run_columbia(options["start_id"], options["end_id"]) From f4615b07d931f93b7a2409438d17f85d6582f4a9 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Thu, 31 Aug 2023 13:35:04 -0600 Subject: [PATCH 020/372] feat(opinion_order): update typing --- .../commands/update_opinions_order.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index 7a46530a82..480f2ef6d6 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -1,5 +1,5 @@ import re -from typing import Any, Optional +from typing import Any, List, Optional from bs4 import BeautifulSoup, NavigableString, Tag from django.core.management import BaseCommand @@ -13,12 +13,12 @@ def match_text_lists( - file_opinions_list: list[str], cl_opinions_list: list[str] -) -> dict[int, Any]: + file_opinions_list: List[Any], cl_opinions_list: List[Any] +) -> dict[int, int]: """Generate matching lists above threshold :param file_opinions_list: Opinions from file :param cl_opinions_list: CL opinions - :return: Matches if found or False + :return: Matches if found or empty dict """ # We import this here to avoid a circular import from cl.corpus_importer.management.commands.harvard_opinions import ( @@ -507,8 +507,16 @@ def run_columbia(start_id: int, end_id: int): if cl_cleaned_opinions and columbia_opinions: matches = match_text_lists( - [op.get("opinion") for op in columbia_opinions], - [op.get("opinion") for op in cl_cleaned_opinions], + [ + op.get("opinion") + for op in columbia_opinions + if op.get("opinion") + ], + [ + op.get("opinion") + for op in cl_cleaned_opinions + if op.get("opinion") + ], ) if matches: From 3ceff218c23c77201b3b78fd7bda838db09a2706 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Thu, 31 Aug 2023 14:30:00 -0600 Subject: [PATCH 021/372] feat(opinion_order): temporary read xml files from s3 it requires to change the AWS_STORAGE_BUCKET_NAME env variable to read files from private storage --- .../management/commands/update_opinions_order.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index 480f2ef6d6..0b96a5dae1 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -134,6 +134,16 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list: data = {} # type: dict + if "/home/mlissner" in str(xml_filepath): + # Temporary replace the path with the correct from S3, this way we read them + # directly from S3, we need the files in /sources/columbia/opinions/ in + # com-courtlistener-storage bucket + # TODO discuss this + xml_filepath.name = xml_filepath.name.replace( + "/home/mlissner", "/sources" + ) + + # print(f"Opening {xml_filepath.url}") with xml_filepath.open("r") as f: file_content = f.read() @@ -502,7 +512,9 @@ def run_columbia(start_id: int, end_id: int): try: columbia_opinions = get_opinions_columbia_xml(xml_path) except FileNotFoundError: - logger.warning(f"Xml file not found, cluster id: {cluster_id}") + logger.warning( + f"Xml file not found in {xml_path}, cluster id: {cluster_id}" + ) continue if cl_cleaned_opinions and columbia_opinions: From 0bd9b9ac3bf2a511633d93de0bdebc49da06ca5d Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Tue, 5 Sep 2023 15:52:59 -0600 Subject: [PATCH 022/372] feat(update_opinions_order): argument added to point to the mounted directory with xml files --- .../commands/update_opinions_order.py | 47 +++++++++++++------ 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index 0b96a5dae1..f6c72811d8 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -1,3 +1,4 @@ +import os.path import re from typing import Any, List, Optional @@ -80,7 +81,6 @@ def get_opinion_content( # We store the field because we are using S3 for storage and that backend # doesn't support absolute paths xml_path = op.local_path - # print("url", op.local_path.url) content = None if len(op.html_with_citations) > 1: content = op.html_with_citations @@ -113,9 +113,10 @@ def get_opinion_content( return xml_path, cl_cleaned_opinions, start_position, combined_opinion -def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list: +def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list: """Convert xml data into dict :param xml_filepath: path of xml file + :param xml_dir: absolute path to the directory with columbia xml files :return: dict with data """ @@ -134,17 +135,17 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list: data = {} # type: dict - if "/home/mlissner" in str(xml_filepath): - # Temporary replace the path with the correct from S3, this way we read them - # directly from S3, we need the files in /sources/columbia/opinions/ in - # com-courtlistener-storage bucket - # TODO discuss this - xml_filepath.name = xml_filepath.name.replace( - "/home/mlissner", "/sources" + if "/home/mlissner/columbia/opinions/" in str(xml_filepath): + filepath = str( + xml_filepath.name.replace("/home/mlissner/columbia/opinions/", "") ) + # fix file path temporarily + new_xml_filepath = os.path.join(xml_dir, filepath) + else: + logger.info(f"Can't fix xml file path: {xml_filepath}") + raise FileNotFoundError - # print(f"Opening {xml_filepath.url}") - with xml_filepath.open("r") as f: + with open(new_xml_filepath, "r", encoding="utf-8") as f: file_content = f.read() data["unpublished"] = False @@ -477,11 +478,12 @@ def run_harvard(start_id: int, end_id: int): logger.info(msg=f"Opinions reordered for cluster id: {oc.id}") -def run_columbia(start_id: int, end_id: int): +def run_columbia(start_id: int, end_id: int, xml_dir: str): """ Update opinion order for columbia clusters :param start_id: skip any id lower than this value :param end_id: skip any id greater than this value + :param xml_dir: absolute path to the directory with columbia xml files """ # Get all columbia cluster ids with more than one opinion @@ -510,7 +512,9 @@ def run_columbia(start_id: int, end_id: int): columbia_opinions = None if xml_path: try: - columbia_opinions = get_opinions_columbia_xml(xml_path) + columbia_opinions = get_opinions_columbia_xml( + xml_path, xml_dir + ) except FileNotFoundError: logger.warning( f"Xml file not found in {xml_path}, cluster id: {cluster_id}" @@ -622,6 +626,12 @@ def add_arguments(self, parser): help="Fix columbia opinions order", ) + parser.add_argument( + "--xml-dir", + required=False, + help="The absolute path to the directory with columbia xml files", + ) + parser.add_argument( "--start-id", type=int, @@ -646,5 +656,12 @@ def handle(self, *args, **options): if options["process_harvard"]: run_harvard(options["start_id"], options["end_id"]) - if options["process_columbia"]: - run_columbia(options["start_id"], options["end_id"]) + if options["process_columbia"] and options["xml_dir"]: + run_columbia( + options["start_id"], options["end_id"], options["xml_dir"] + ) + + if options["process_columbia"] and not options["xml_dir"]: + print( + "Argument --xml-dir required to read xml files from mounted directory" + ) From 7b16b42d99c1f8b9076d47ddb0ba916df21b564a Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Tue, 5 Sep 2023 16:09:04 -0600 Subject: [PATCH 023/372] feat(update_opinions_order): fix mypy error --- .../management/commands/update_opinions_order.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index f6c72811d8..05a1bdb7f5 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -136,8 +136,8 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list: data = {} # type: dict if "/home/mlissner/columbia/opinions/" in str(xml_filepath): - filepath = str( - xml_filepath.name.replace("/home/mlissner/columbia/opinions/", "") + filepath = str(xml_filepath).replace( + "/home/mlissner/columbia/opinions/", "" ) # fix file path temporarily new_xml_filepath = os.path.join(xml_dir, filepath) From d49708adfacecfa075dd3a298a8cdc867532c008 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Thu, 21 Sep 2023 13:27:05 -0600 Subject: [PATCH 024/372] fix(opinion_order): Update poetry.lock --- poetry.lock | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 90d12b08bf..a7f14d94cb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "amqp" @@ -1097,6 +1097,17 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -2577,6 +2588,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2721,7 +2742,7 @@ name = "ndg-httpsclient" version = "0.5.1" description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL" optional = false -python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0" +python-versions = ">=2.7,<3.0.0 || >=3.4.0" files = [ {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"}, {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"}, @@ -5096,4 +5117,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11, <3.12" -content-hash = "96bb211d8a53b99b00d7d118fd7f90f35dcf27b9a940532d8ea814eecc5cbd6b" +content-hash = "6ce30a4f34302d7e0ca29bf1f9794ad2fc1759cef8312bcfebb5550a33cb0019" From 9ae8dc891f764a471729dc8131e0e071bd9f9e7c Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Fri, 29 Sep 2023 17:15:04 -0600 Subject: [PATCH 025/372] fix(opinion_order): Update poetry.lock --- poetry.lock | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index c9b8295b88..a7fe6b3511 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1097,6 +1097,17 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -2577,6 +2588,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -5096,4 +5117,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11, <3.12" -content-hash = "4b906615444a53e1a26780aa6a3742c0e7844c307c6a991b059ee4de0cb177a8" +content-hash = "6da7f3d3b926ac02caf9720eda2b6c81ae71fe04aafb6a0a35f83e52b4c412cc" From 7702a082063ed2b80b6f803a1a6afa7af6347887 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Fri, 29 Sep 2023 18:03:45 -0600 Subject: [PATCH 026/372] fix(opinion_order): Rename migrations --- .../{0020_order_opinions.py => 0022_order_opinions.py} | 2 +- .../{0020_order_opinions.sql => 0022_order_opinions.sql} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cl/search/migrations/{0020_order_opinions.py => 0022_order_opinions.py} (99%) rename cl/search/migrations/{0020_order_opinions.sql => 0022_order_opinions.sql} (100%) diff --git a/cl/search/migrations/0020_order_opinions.py b/cl/search/migrations/0022_order_opinions.py similarity index 99% rename from cl/search/migrations/0020_order_opinions.py rename to cl/search/migrations/0022_order_opinions.py index f614156360..763c98e8fc 100644 --- a/cl/search/migrations/0020_order_opinions.py +++ b/cl/search/migrations/0022_order_opinions.py @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("search", "0019_add_docket_source_noop"), + ("search", "0021_add_pghistory_courthouse"), ] operations = [ diff --git a/cl/search/migrations/0020_order_opinions.sql b/cl/search/migrations/0022_order_opinions.sql similarity index 100% rename from cl/search/migrations/0020_order_opinions.sql rename to cl/search/migrations/0022_order_opinions.sql From 3f173fef6ac191d2c1a0b43f38de3917a9f9b9bf Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 29 Nov 2023 13:08:52 -0600 Subject: [PATCH 027/372] fix(opinions_order): rename migrations update poetry.lock --- ...der_opinions.py => 0024_order_opinions.py} | 2 +- ...r_opinions.sql => 0024_order_opinions.sql} | 0 poetry.lock | 30 ++++++++----------- 3 files changed, 14 insertions(+), 18 deletions(-) rename cl/search/migrations/{0022_order_opinions.py => 0024_order_opinions.py} (99%) rename cl/search/migrations/{0022_order_opinions.sql => 0024_order_opinions.sql} (100%) diff --git a/cl/search/migrations/0022_order_opinions.py b/cl/search/migrations/0024_order_opinions.py similarity index 99% rename from cl/search/migrations/0022_order_opinions.py rename to cl/search/migrations/0024_order_opinions.py index 763c98e8fc..1abaed4d76 100644 --- a/cl/search/migrations/0022_order_opinions.py +++ b/cl/search/migrations/0024_order_opinions.py @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("search", "0021_add_pghistory_courthouse"), + ("search", "0023_add_docket_sources_noop"), ] operations = [ diff --git a/cl/search/migrations/0022_order_opinions.sql b/cl/search/migrations/0024_order_opinions.sql similarity index 100% rename from cl/search/migrations/0022_order_opinions.sql rename to cl/search/migrations/0024_order_opinions.sql diff --git a/poetry.lock b/poetry.lock index cdb46a7ef7..30080de3f7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1101,6 +1101,17 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -1493,18 +1504,6 @@ files = [ {file = "fast_diff_match_patch-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c4cb3aa60664bcafd070915cc0f148c63da3a20babeca29bdf24e6aee80ff481"}, {file = "fast_diff_match_patch-2.0.1-cp310-cp310-win32.whl", hash = "sha256:3423c373c168fcbc56fa488960248ce086dd686402817aa5d4d967537fff1203"}, {file = "fast_diff_match_patch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:f8b5595277f99b4908ae9bab33548bfe7497a99a1f5dc5c277a4f36051dcf993"}, - {file = "fast_diff_match_patch-2.0.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a682a72b93e07902b9af3bc591fe365da4024888cceb308f04cdec59eeb3602d"}, - {file = "fast_diff_match_patch-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30e7fb0de87e02db88cda54f6c57a9f7d789e4d0922cfed41f61a1d4415408b"}, - {file = "fast_diff_match_patch-2.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:58b273cecb941bef392bda622a534de03e6ea8d3186d4d07745375cce9db0833"}, - {file = "fast_diff_match_patch-2.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e39bb9ca0b7632a15e85cb6b0c4c575010e6fb6e43e5714ee53c7cef1aa4135"}, - {file = "fast_diff_match_patch-2.0.1-cp311-cp311-win32.whl", hash = "sha256:b4d4e6aa5c6a4af0b6c66be593021579f4693c94b848084b89e6783180361db6"}, - {file = "fast_diff_match_patch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:c1154830dbcb83d1c9ed24f43b1e8226cafc7ce46b6e0971e866bdf513ecc216"}, - {file = "fast_diff_match_patch-2.0.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6723cfba7bd9fb712e179acbc9c6cb526076612c0325ad4f1066f3bd176064a"}, - {file = "fast_diff_match_patch-2.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:378251cc37cd21d14802669a3453f026ed3aa07c07a8aa2daabeefd14a0e0a36"}, - {file = "fast_diff_match_patch-2.0.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7a2e1ce344438b14400a91b65c79c39345b0ce70a0a8797e88b14485577b5fc0"}, - {file = "fast_diff_match_patch-2.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cc7285d9a1fbf8990361ce37728202fd6ebee6ddc6cfe6fb15a19905e562f304"}, - {file = "fast_diff_match_patch-2.0.1-cp312-cp312-win32.whl", hash = "sha256:3aaeb207fe586979ecb194ecc2c81ba979d351cd0bdaba8489ce4be0f55206dc"}, - {file = "fast_diff_match_patch-2.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:4d759ec2d79c638407f32c29dc348fcef6e6a1659927056527b0939a1ab31ca5"}, {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e5205e4f3b820f65138947e0d42959b6910fd959c8e5e8f4fc72472f6fec9d8b"}, {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa1212d0200169e93392805957ca6ae351bfc51282c5119fb231f968c7e12fbc"}, {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d30a9db041dfee960a9c8a35fa99685b1f29530f52f69fef1e3cc02867f0b9"}, @@ -1545,9 +1544,6 @@ files = [ {file = "fast_diff_match_patch-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:58ada748637821445df3cfcb21df412136fb69b8e677ea364aa9ca7a8facb048"}, {file = "fast_diff_match_patch-2.0.1-cp39-cp39-win32.whl", hash = "sha256:b07808e98f0bfcd557281126135b24729a30ee10ccc2db4d3358fb2f18ac1879"}, {file = "fast_diff_match_patch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:6f2202d1e9d225918ea3803f66ca9c99d080c8ba5094c438680eb2c8dfd2e48c"}, - {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ecff01b3d10d6bed965a1591e37597df118ab0bcc98a3f59a724a0d9bd63fb1"}, - {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a92ba0d543524234a17ea2da4892a9752273cfdfed528e581f0f76cbd78cf991"}, - {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd5b3b99bb7c14ce8ea5ab184afb2cc6796dac71439b2cfc6fb6227a6846aef3"}, {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:daa821a8dcbc1026f7f8cc177ca599bcfbaaddccdf90bc1ad1e44255b1c239e1"}, {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27de6dc97e7d6dc207585d778ace58e7cc364b8383e5412164224d52ad4099b5"}, {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec27f797b1ecee79c3d76c9a081a6c20fd89068b41ba3b84a6ebe48317c5c46c"}, @@ -2750,7 +2746,7 @@ name = "ndg-httpsclient" version = "0.5.1" description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL" optional = false -python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0" +python-versions = ">=2.7,<3.0.0 || >=3.4.0" files = [ {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"}, {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"}, @@ -5212,4 +5208,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11, <3.12" -content-hash = "f3edde54a6877b5506669d8d8354b28d8b7c6dffbb08c4b0954079680cec63dc" +content-hash = "ce20135f86ae0bc9264359886c298076a90c74d5a30256f7db4541812ffb4f76" From 9dedd433ca589f7db5f4d71edd7318fbd34e3aa8 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 29 Nov 2023 19:14:00 -0600 Subject: [PATCH 028/372] fix(opinions_order): code refactored NOTE: functions found in columbia_utils.py and utils.py, were temporarily added in the command,when the necessary changes are combined we need to remove the functions and import them from the utils. --- .../commands/update_opinions_order.py | 833 ++++++++++-------- 1 file changed, 461 insertions(+), 372 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index 05a1bdb7f5..ae931ba4b7 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -5,18 +5,356 @@ from bs4 import BeautifulSoup, NavigableString, Tag from django.core.management import BaseCommand from django.db.models import Count -from django.db.models.fields.files import FieldFile from cl.corpus_importer.utils import similarity_scores from cl.lib.command_utils import logger from cl.lib.string_diff import get_cosine_similarity -from cl.search.models import Opinion, OpinionCluster +from cl.search.models import SOURCES, Opinion, OpinionCluster + +VALID_COLUMBIA_SOURCES = [ + key + for key in dict(SOURCES.NAMES).keys() + if SOURCES.COLUMBIA_ARCHIVE in key +] + +VALID_HARVARD_SOURCES = [ + key for key in dict(SOURCES.NAMES).keys() if SOURCES.HARVARD_CASELAW in key +] + + +# TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged + + +SIMPLE_TAGS = [ + "attorneys", + "caption", + "citation", + "court", + "date", + "docket", + "hearing_date", + "panel", + "posture", + "reporter_caption", +] + + +class EmptyOpinionException(Exception): + """An exception for opinions that raise a ZeroDivisionError Exception due empty + opinion tag or empty opinion content in cl""" + + def __init__(self, message: str) -> None: + self.message = message + + +def read_xml_to_soup(filepath: str) -> BeautifulSoup: + """This function reads the xml file, fixes the bad tags in columbia xml + files and returns a BeautifulSoup object + + :param filepath: path to xml file + :return: BeautifulSoup object of parsed content + """ + with open(filepath, "r", encoding="utf-8") as f: + file_content = f.read() + # Sometimes opening and ending tag mismatch (e.g. ed7c6b39dcb29c9c.xml) + file_content = file_content.replace( + "", "" + ) + # Fix opinion with invalid attribute + if "" in file_content: + file_content = file_content.replace( + "", "" + ) + file_content = file_content.replace("", "").replace( + "", "" + ) + return BeautifulSoup(file_content, "lxml") + + +def add_floating_opinion( + opinions: list, floating_content: list, opinion_order: int +) -> list: + """We have found floating opinions in bs object, we keep the opinion + content as a new opinion + + :param opinions: a list with opinions found + :param floating_content: content that is not in known non-opinion tags + :param opinion_order: opinion position + :return: updated list of opinions + """ + op_type = "opinion" + if opinions: + if opinions[-1].get("type"): + # Use type of previous opinion if exists + op_type = opinions[-1].get("type") + + # Get rid of double spaces from floating content + opinion_content = re.sub( + " +", " ", "\n".join(floating_content) + ).strip() # type: str + if opinion_content: + opinions.append( + { + "opinion": opinion_content, + "order": opinion_order, + "byline": "", + "type": op_type, + } + ) + return opinions + + +def extract_columbia_opinions( + outer_opinion: BeautifulSoup, +) -> list[Optional[dict]]: + """We extract all possible opinions from BeautifulSoup, with and without + author, and we create new opinions if floating content exists(content that + is not explicitly defined within an opinion tag or doesn't have an author) + + :param outer_opinion: element containing all xml tags + :return: list of opinion dicts + """ + opinions: list = [] + floating_content = [] + order = 0 + + # We iterate all content to look for all possible opinions + for i, content in enumerate(outer_opinion): # type: int, Tag + if isinstance(content, NavigableString): + # We found a raw string, store it + floating_content.append(str(content)) + else: + if content.name in SIMPLE_TAGS + [ + "citation_line", + "opinion_byline", + "dissent_byline", + "concurrence_byline", + ]: + # Ignore these tags, it will be processed later + continue + elif content.name in [ + "opinion_text", + "dissent_text", + "concurrence_text", + ]: + if floating_content: + # We have found an opinion, but there is floating + # content, we create a dict with the opinion using the + # floating content with default type = "opinion" + opinions = add_floating_opinion( + opinions, floating_content, order + ) + floating_content = [] + + byline = content.find_previous_sibling() + opinion_author = "" + if byline and "_byline" in byline.name: + opinion_author = byline.get_text() + + opinion_content = re.sub( + " +", " ", content.decode_contents() + ).strip() + if opinion_content: + # Now we create a dict with current opinion + opinions.append( + { + "opinion": opinion_content, + "order": order, + "byline": opinion_author, + "type": content.name.replace("_text", ""), + } + ) + order = order + 1 + + else: + if content.name not in SIMPLE_TAGS + ["syllabus"]: + # We store content that is not inside _text tag and is + # not in one of the known non-opinion tags + floating_content.append(str(content)) + + # Combine the new content into another opinion. great. + if floating_content: + # If we end to go through all the found opinions and if we still + # have floating content out there, we create a new opinion with the + # last type of opinion + opinions = add_floating_opinion(opinions, floating_content, order) + return opinions + + +def is_per_curiam_opinion( + content: Optional[str], byline: Optional[str] +) -> bool: + """Check if opinion author is per curiam + :param content: opinion content + :param byline: opinion text author + :return: True if opinion author is per curiam + """ + if byline and "per curiam" in byline[:1000].lower(): + return True + if content and "per curiam" in content[:1000].lower(): + return True + return False + + +def merge_opinions( + opinions: list, content: list, current_order: int +) -> tuple[list, int]: + """Merge last and previous opinion if are the same type or create a new + opinion if merge is not possible + + :param opinions: list of opinions that is being updated constantly + :param content: list of opinions without an author + :param current_order: opinion position + :return: updated list of opinions + """ + + # We check if the previous stored opinion matches the type of the + # content, and we store the opinion dict temporary + relevant_opinions = ( + [opinions[-1]] + if opinions and opinions[-1]["type"] == content[0].get("type") + else [] + ) + + if relevant_opinions: + relevant_opinions[-1]["opinion"] += "\n" + "\n".join( + [f.get("opinion") for f in content if f.get("opinion")] + ) + + else: + # No relevant opinions found, create a new opinion with the content + opinion_content = "\n".join( + [f.get("opinion") for f in content if f.get("opinion")] + ) + new_opinion = { + "byline": None, + "type": content[0].get("type"), + "opinion": opinion_content, + "order": current_order, + "per_curiam": is_per_curiam_opinion(opinion_content, None), + } + opinions.append(new_opinion) + current_order = current_order + 1 + + return opinions, current_order + + +def process_extracted_opinions(extracted_opinions: list) -> list: + """We read the extracted data in extract_opinions function to merge all + possible floating opinions (it is not explicitly defined within an opinion + tag or doesn't have an author) + + :param extracted_opinions: list of opinions obtained from xml file + :return: a list with extracted and processed opinions + """ + + opinions: list = [] + authorless_content = [] + order = 0 + + for i, found_content in enumerate(extracted_opinions, start=1): + byline = found_content.get("byline") + if not byline: + # Opinion has no byline, store opinion content + authorless_content.append(found_content) + + if byline: + # Opinion has byline, get opinion type and content + opinion_type = found_content.get("type") + opinion_content = found_content.get("opinion", "") + # Store content that doesn't match the current opinion type + alternative_authorless_content = [ + content + for content in authorless_content + if content.get("type") != opinion_type + ] + # Keep content that matches the current type + authorless_content = [ + op_content + for op_content in authorless_content + if op_content.get("type") == opinion_type + ] + + if alternative_authorless_content: + # Keep floating text that are not from the same type, + # we need to create a separate opinion for those, + # for example: in 2713f39c5a8e8684.xml we have an opinion + # without an author, and the next opinion with an author is + # a dissent opinion, we can't combine both + opinions, order = merge_opinions( + opinions, alternative_authorless_content, order + ) + + opinion_content = ( + "\n".join( + [ + f.get("opinion") + for f in authorless_content + if f.get("type") == opinion_type + ] + ) + + "\n\n" + + opinion_content + ) + + # Add new opinion + new_opinion = { + "byline": byline, + "type": opinion_type, + "opinion": opinion_content, + "order": order, + "per_curiam": is_per_curiam_opinion(opinion_content, byline), + } + + opinions.append(new_opinion) + order = order + 1 + authorless_content = [] + + if len(extracted_opinions) == i and authorless_content: + # If is the last opinion, and we still have opinions without + # byline, create an opinion without an author and the contents + # that couldn't be merged + opinions, order = merge_opinions( + opinions, authorless_content, order + ) + + return opinions + + +def map_opinion_types(opinions=None) -> None: + """Map opinion type to model field choice + + :param opinions: a list that contains all opinions as dict elements + :return: None + """ + + if opinions is None: + opinions = [] + lead = False + for op in opinions: + op_type = op.get("type") + # Only first opinion with "opinion" type is a lead opinion, the next + # opinion with "opinion" type is an addendum + if not lead and op_type and op_type == "opinion": + lead = True + op["type"] = "020lead" + continue + elif lead and op_type and op_type == "opinion": + op["type"] = "050addendum" + elif op_type and op_type == "dissent": + op["type"] = "040dissent" + elif op_type and op_type == "concurrence": + op["type"] = "030concurrence" + + +# TODO ------------------------ remove until here ------------------------------- def match_text_lists( file_opinions_list: List[Any], cl_opinions_list: List[Any] ) -> dict[int, int]: """Generate matching lists above threshold + :param file_opinions_list: Opinions from file :param cl_opinions_list: CL opinions :return: Matches if found or empty dict @@ -50,10 +388,11 @@ def match_text_lists( return matches -def get_opinion_content( +def get_opinions_cleaned_content( cluster_id, -) -> tuple[Optional[FieldFile], list[dict], int, bool]: - """Get the opinions content for a cluster object +) -> tuple[Optional[str], list[dict], int, bool]: + """Get cleaned opinions content for a cluster object + :param cluster_id: Cluster ID for a set of opinions :return: (xml path, list of extracted opinions, start position, True if combined opinions exists in cluster) @@ -67,380 +406,108 @@ def get_opinion_content( type="010combined" ) xml_path = None - combined_opinion = False + cluster_has_combined_opinion = False if combined_opinions_cluster: # the combined opinion will be displayed at beginning start_position = combined_opinions_cluster.count() - combined_opinion = True + cluster_has_combined_opinion = True else: # we don't have combined opinions, we start ordering from 0 to n start_position = 0 for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")): if op.local_path and not xml_path: - # We store the field because we are using S3 for storage and that backend - # doesn't support absolute paths - xml_path = op.local_path - content = None - if len(op.html_with_citations) > 1: - content = op.html_with_citations - elif len(op.html_columbia) > 1: - content = op.html_columbia - elif len(op.html_lawbox) > 1: - content = op.html_lawbox - elif len(op.plain_text) > 1: - content = op.plain_text - elif len(op.html) > 1: - content = op.html - elif len(op.xml_harvard) > 1: - content = op.xml_harvard - if content: - soup = BeautifulSoup(content, features="html.parser") - prep_text = re.sub( - r"[^a-zA-Z0-9 ]", "", soup.getText(separator=" ").lower() - ) - prep_text = re.sub(" +", " ", prep_text) - cl_cleaned_opinions.append( - { - "id": op.id, - "byline": op.author_str, - "type": op.type, - "opinion": prep_text, - "order": i, - } - ) - - return xml_path, cl_cleaned_opinions, start_position, combined_opinion - + xml_path = str(op.local_path) -def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list: - """Convert xml data into dict - :param xml_filepath: path of xml file - :param xml_dir: absolute path to the directory with columbia xml files - :return: dict with data - """ - - SIMPLE_TAGS = [ - "attorneys", - "caption", - "citation", - "court", - "date", - "docket", - "hearing_date", - "panel", - "posture", - "reporter_caption", - ] - - data = {} # type: dict - - if "/home/mlissner/columbia/opinions/" in str(xml_filepath): - filepath = str(xml_filepath).replace( - "/home/mlissner/columbia/opinions/", "" - ) - # fix file path temporarily - new_xml_filepath = os.path.join(xml_dir, filepath) - else: - logger.info(f"Can't fix xml file path: {xml_filepath}") - raise FileNotFoundError - - with open(new_xml_filepath, "r", encoding="utf-8") as f: - file_content = f.read() + content = None - data["unpublished"] = False + # We can only use columbia's content to infer the ordering + if len(op.html_columbia) > 1: + content = op.html_columbia - if "" in file_content: - file_content = file_content.replace( - "", "" - ) - file_content = file_content.replace("", "").replace( - "", "" + if not content: + raise EmptyOpinionException( + "There is no content in html_columbia field" ) - data["unpublished"] = True + soup = BeautifulSoup(content, features="html.parser") + opinion_text = soup.getText(separator=" ", strip=True) + prep_text = re.sub( + " +", " ", " ".join(opinion_text.split("\n")) + ).strip() + prep_text = re.sub(r"[^a-zA-Z0-9 ]", "", prep_text.lower()) + + cl_cleaned_opinions.append( + { + "id": op.id, + "byline": op.author_str, + "type": op.type, + "opinion": prep_text, + "order": i, + } + ) - # Sometimes opening and ending tag mismatch (e.g. c6b39dcb29c9c.xml) - file_content = file_content.replace( - "", "" + return ( + xml_path, + cl_cleaned_opinions, + start_position, + cluster_has_combined_opinion, ) - soup = BeautifulSoup(file_content, "lxml") - - # Find the outer tag to have all elements inside - find_opinion = soup.find("opinion") - - step_one_opinions = [] # type: list - opinions = [] # type: list - order = 0 - - if find_opinion: - untagged_content = [] - # We iterate all content, with and without tags - # STEP 1: Extract all content in multiple dict elements - for i, content in enumerate(find_opinion): # type: int, Tag - if type(content) == NavigableString: - # We found a raw string, store it - untagged_content.append(str(content)) +def fix_filepath(filepath: str) -> str: + """Fix filepath from file field - else: - if content.name in SIMPLE_TAGS + [ - "citation_line", - "opinion_byline", - "dissent_byline", - "concurrence_byline", - ]: - # Ignore these tags, it will be processed later - continue - elif content.name in [ - "opinion_text", - "dissent_text", - "concurrence_text", - ]: - if untagged_content: - # We found something other than a navigable string that is - # not an opinion, but now we have found an opinion, - # let's create this content first - - # default type - op_type = "opinion" - if step_one_opinions: - if step_one_opinions[-1].get("type"): - # use type of previous opinion if exists - op_type = step_one_opinions[-1].get("type") - - # Get rid of double spaces - opinion_content = re.sub( - " +", " ", "\n".join(untagged_content) - ).strip() # type: str - if opinion_content: - step_one_opinions.append( - { - "opinion": opinion_content, - "order": order, - "byline": "", - "type": op_type, - } - ) - order = order + 1 - untagged_content = [] - - byline = content.find_previous_sibling() - opinion_author = "" - if byline and "_byline" in byline.name: - opinion_author = byline.get_text() - - opinion_content = re.sub( - " +", " ", content.decode_contents() - ).strip() - if opinion_content: - step_one_opinions.append( - { - "opinion": opinion_content, - "order": order, - "byline": opinion_author, - "type": content.name.replace("_text", ""), - } - ) - order = order + 1 - - else: - # Content not inside _text tag, we store it - untagged_content.append(str(content)) - - if untagged_content: - # default type - op_type = "opinion" - if step_one_opinions: - if step_one_opinions[-1].get("type"): - # use type of previous opinion if exists - op_type = step_one_opinions[-1].get("type") - - opinion_content = re.sub( - " +", " ", "\n".join(untagged_content) - ).strip() - if opinion_content: - step_one_opinions.append( - { - "opinion": opinion_content, - "order": order, - "byline": "", - "type": op_type, - } - ) + :param filepath: path from file field + :return: new file path + """ + if "/home/mlissner/columbia/opinions/" in filepath: + filepath = filepath.replace("/home/mlissner/columbia/opinions/", "") + return filepath - # Step 2: Merge found content in the xml file - new_order = 0 - authorless_content = [] - - for i, found_content in enumerate(step_one_opinions, start=1): - byline = found_content.get("byline") - if not byline: - # Opinion has no byline, store it - authorless_content.append(found_content) - - if byline: - # Opinion has byline - opinion_type = found_content.get("type") - opinion_content = found_content.get("opinion", "") - # Store content that doesn't match the current type - alternative_authorless_content = [ - z - for z in authorless_content - if z.get("type") != opinion_type - ] - # Keep content that matches the current type - authorless_content = [ - z - for z in authorless_content - if z.get("type") == opinion_type - ] - - if alternative_authorless_content: - # Keep floating text that are not from the same type, - # we need to create a separate opinion for those, - # for example: in 2713f39c5a8e8684.xml we have an opinion - # without an author, and the next opinion with an author is - # a dissent opinion, we can't combine both - - # We check if the previous stored opinion matches the type of the - # content - relevant_opinions = ( - [opinions[-1]] - if opinions - and opinions[-1]["type"] - == alternative_authorless_content[0].get("type") - else [] - ) - if relevant_opinions: - previous_opinion = relevant_opinions[-1] - if previous_opinion.get( - "type" - ) == alternative_authorless_content[0].get("type"): - # Merge last opinion with previous opinion, it probably - # belongs the same author - relevant_opinions[-1][ - "opinion" - ] += "\n" + "\n".join( - [ - f.get("opinion") - for f in alternative_authorless_content - if f.get("opinion") - ] - ) - authorless_content = [] +def get_opinions_columbia_file(xml_filepath: str) -> list: + """Get opinions from columbia xml file and convert it into dict - else: - # No relevant opinions found, create a new opinion - new_opinion = { - "byline": None, - "type": alternative_authorless_content[0].get( - "type" - ), - "opinion": "\n".join( - [ - f.get("opinion") - for f in alternative_authorless_content - if f.get("opinion") - ] - ), - "order": new_order, - } - new_order = new_order + 1 - opinions.append(new_opinion) - - # Add new opinion - new_opinion = { - "byline": byline, - "type": opinion_type, - "opinion": "\n".join( - [ - f.get("opinion") - for f in authorless_content - if f.get("type") == opinion_type - ] - ) - + "\n\n" - + opinion_content, - "order": new_order, - } - - opinions.append(new_opinion) - new_order = new_order + 1 - authorless_content = [] - - if len(step_one_opinions) == i and authorless_content: - # If is the last opinion, and we still have opinions without - # byline, create an opinion without an author and the contents - # that couldn't be merged - - # We check if the previous stored opinion matches the type of the - # content - relevant_opinions = ( - [opinions[-1]] - if opinions - and opinions[-1]["type"] - == authorless_content[0].get("type") - else [] - ) + :param xml_filepath: path of xml file + :return: dict with data + """ + soup = read_xml_to_soup(xml_filepath) - if relevant_opinions: - previous_opinion = relevant_opinions[-1] - if previous_opinion.get("type") == authorless_content[ - 0 - ].get("type"): - # Merge last opinion with previous opinion, it probably - # belongs the same author - relevant_opinions[-1]["opinion"] += "\n" + "\n".join( - [ - f.get("opinion") - for f in authorless_content - if f.get("opinion") - ] - ) + # Find the outer tag to have all elements inside + outer_opinion = soup.find("opinion") - else: - # Create last floating opinion - new_opinion = { - "byline": None, - "type": authorless_content[0].get("type"), - "opinion": "\n".join( - [ - f.get("opinion") - for f in authorless_content - if f.get("opinion") - ] - ), - "order": new_order, - } - opinions.append(new_opinion) + extracted_opinions = extract_columbia_opinions(outer_opinion) + opinions = process_extracted_opinions(extracted_opinions) + map_opinion_types(opinions) for op in opinions: opinion_content = op.get("opinion") - opinion_content = BeautifulSoup( - opinion_content, "html.parser" - ).getText() - opinion_content = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_content.lower()) - op["opinion"] = opinion_content + soup = BeautifulSoup(opinion_content, "html.parser") + opinion_text = soup.getText(separator=" ", strip=True) + opinion_text = re.sub( + " +", " ", " ".join(opinion_text.split("\n")) + ).strip() + cleaned_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_text.lower()) + op["opinion"] = cleaned_opinion return opinions -def run_harvard(start_id: int, end_id: int): - """ - We assume that harvard data is already ordered, we just need to fill the order +def sort_harvard_opinions(start_id: int, end_id: int) -> None: + """We assume that harvard data is already ordered, we just need to fill the order field in each opinion + :param start_id: skip any id lower than this value :param end_id: skip any id greater than this value + :return: None """ # Get all harvard clusters with more than one opinion clusters = ( OpinionCluster.objects.prefetch_related("sub_opinions") .annotate(opinions_count=Count("sub_opinions")) - .filter(opinions_count__gt=1, source="U") + .filter(opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES) .order_by("id") ) @@ -478,18 +545,19 @@ def run_harvard(start_id: int, end_id: int): logger.info(msg=f"Opinions reordered for cluster id: {oc.id}") -def run_columbia(start_id: int, end_id: int, xml_dir: str): - """ - Update opinion order for columbia clusters +def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None: + """Update opinion ordering for columbia clusters + :param start_id: skip any id lower than this value :param end_id: skip any id greater than this value :param xml_dir: absolute path to the directory with columbia xml files + :return: None """ # Get all columbia cluster ids with more than one opinion clusters = ( OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions")) - .filter(opinions_count__gt=1, source="Z") + .filter(opinions_count__gt=1, source__in=VALID_COLUMBIA_SOURCES) .order_by("id") .values_list("id", flat=True) ) @@ -502,37 +570,53 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str): for cluster_id in clusters: logger.info(f"Processing cluster id: {cluster_id}") - ( - xml_path, - cl_cleaned_opinions, - start_position, - combined_opinion, - ) = get_opinion_content(cluster_id) - - columbia_opinions = None + + try: + ( + xml_path, + cl_cleaned_opinions, + start_position, + cluster_has_combined_opinion, + ) = get_opinions_cleaned_content(cluster_id) + except EmptyOpinionException: + logger.warning( + f"At least one of the opinions from cluster id: {cluster_id} is empty." + ) + continue + + extracted_columbia_opinions = None if xml_path: - try: - columbia_opinions = get_opinions_columbia_xml( - xml_path, xml_dir - ) - except FileNotFoundError: + fixed_xml_filepath = os.path.join(xml_dir, fix_filepath(xml_path)) + + if not os.path.exists(fixed_xml_filepath): logger.warning( - f"Xml file not found in {xml_path}, cluster id: {cluster_id}" + f"Xml file not found in {fixed_xml_filepath}, cluster id: {cluster_id}" + ) + continue + + try: + extracted_columbia_opinions = get_opinions_columbia_file( + fixed_xml_filepath ) + except UnicodeDecodeError: + logger.warning(f"Cannot decode file: {fixed_xml_filepath}") continue - if cl_cleaned_opinions and columbia_opinions: + if cl_cleaned_opinions and extracted_columbia_opinions: + columbia_opinions_content = [ + op.get("opinion") + for op in extracted_columbia_opinions + if op.get("opinion") + ] + cl_opinions_content = [ + op.get("opinion") + for op in cl_cleaned_opinions + if op.get("opinion") + ] + matches = match_text_lists( - [ - op.get("opinion") - for op in columbia_opinions - if op.get("opinion") - ], - [ - op.get("opinion") - for op in cl_cleaned_opinions - if op.get("opinion") - ], + columbia_opinions_content, + cl_opinions_content, ) if matches: @@ -557,7 +641,7 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str): failed = False for file_pos, cl_pos in matches.items(): # file_pos is the correct index to find the opinion id to update - file_opinion = columbia_opinions[file_pos] + file_opinion = extracted_columbia_opinions[file_pos] # the order was calculated using the xml file file_order = file_opinion.get("order") + start_position cl_opinion = cl_cleaned_opinions[cl_pos] @@ -587,7 +671,7 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str): failed = True break - if combined_opinion and not failed: + if cluster_has_combined_opinion and not failed: combined_opinions_cluster = Opinion.objects.filter( cluster_id=cluster_id, type="010combined" ).order_by("id") @@ -628,6 +712,7 @@ def add_arguments(self, parser): parser.add_argument( "--xml-dir", + default="/opt/courtlistener/_columbia", required=False, help="The absolute path to the directory with columbia xml files", ) @@ -653,11 +738,15 @@ def handle(self, *args, **options): ) return + if not options["process_harvard"] and not options["process_columbia"]: + print("One option required: process-harvard or process-columbia") + return + if options["process_harvard"]: - run_harvard(options["start_id"], options["end_id"]) + sort_harvard_opinions(options["start_id"], options["end_id"]) if options["process_columbia"] and options["xml_dir"]: - run_columbia( + sort_columbia_opinions( options["start_id"], options["end_id"], options["xml_dir"] ) From f808b95b68487580b3d24be400afee91dcd4f938 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Thu, 30 Nov 2023 11:43:58 -0600 Subject: [PATCH 029/372] fix(opinions_order): code refactored NOTE: functions found in columbia_utils.py and utils.py, were temporarily added in the command,when the necessary changes are combined we need to remove the functions and import them from the utils. --- .../commands/update_opinions_order.py | 231 +++++++++++------- 1 file changed, 147 insertions(+), 84 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index ae931ba4b7..5b86c98130 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -1,12 +1,13 @@ import os.path import re -from typing import Any, List, Optional +from typing import Any, Optional from bs4 import BeautifulSoup, NavigableString, Tag from django.core.management import BaseCommand +from django.db import transaction from django.db.models import Count -from cl.corpus_importer.utils import similarity_scores +from cl.corpus_importer.utils import compare_documents, similarity_scores from cl.lib.command_utils import logger from cl.lib.string_diff import get_cosine_similarity from cl.search.models import SOURCES, Opinion, OpinionCluster @@ -24,7 +25,6 @@ # TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged - SIMPLE_TAGS = [ "attorneys", "caption", @@ -347,47 +347,86 @@ def map_opinion_types(opinions=None) -> None: op["type"] = "030concurrence" -# TODO ------------------------ remove until here ------------------------------- - - -def match_text_lists( - file_opinions_list: List[Any], cl_opinions_list: List[Any] +def match_opinion_lists( + file_opinions_list: list[Any], cl_opinions_list: list[Any] ) -> dict[int, int]: - """Generate matching lists above threshold + """Try to match the opinions on two lists and generate a dict with position of + matching opinions + + Remove non-alphanumeric and non-whitespace characters from lowercased text, + this tries to make both texts in equal conditions to prove if both are similar or + equal + + get_cosine_similarity works great when both texts are almost the same with very + small variations + + Sometimes cosine similarity fails when there are small variations in text, + such as parties, attorneys, case name, or court that are included in the content + of the opinion, compare_documents() checks the percentage of the file opinion + text that it is in courtlistener opinion, having a large percentage means that + almost all the file opinion is in courtlistener opinion, but there is a + possibility that the courtlistener opinion contains some additional data in que + opinion content (such as case name, parties, etc.) + + compare_documents works good when the opinion from the file is a subset of the + opinion in CL, the percentage represents how much of the opinion of the file is + in the opinion from cl (content in cl opinion can have other data in the body + like posture, attorneys, etc. e.g. in cluster id: 7643871 we have the posture and + the opinion text but in the xml file we only have the opinion text, cosine_sim: + 0.1639075094124459 and percent_match: 73) + + Sometimes one algorithm performs better than the other, this is due to some + additional text, such as editor's notes, or the author, page number or posture + added to the opinion + + Key is opinion position from file, Value is opinion position from cl opinion e.g. + matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file opinion and + 2 is cl opinion :param file_opinions_list: Opinions from file :param cl_opinions_list: CL opinions :return: Matches if found or empty dict """ - # We import this here to avoid a circular import - from cl.corpus_importer.management.commands.harvard_opinions import ( - compare_documents, - ) scores = similarity_scores(file_opinions_list, cl_opinions_list) matches = {} for i, row in enumerate(scores): j = row.argmax() # type: ignore - # Lower threshold for small opinions. - if ( - get_cosine_similarity(file_opinions_list[i], cl_opinions_list[j]) - < 0.60 - ): - continue - percent_match = compare_documents( - file_opinions_list[i], cl_opinions_list[j] + file_opinion = re.sub( + r"[^a-zA-Z0-9 ]", "", file_opinions_list[i].lower() ) - if percent_match < 60: + cl_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", cl_opinions_list[j].lower()) + + cosine_sim = get_cosine_similarity(file_opinion, cl_opinion) + + percent_match = compare_documents(file_opinion, cl_opinion) + + if cosine_sim < 0.60 and percent_match < 60: continue + matches[i] = j - # Key is opinion position from file, Value is opinion position from cl opinion - # e.g. matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file - # opinion and 2 is cl opinion return matches +def clean_opinion_content(text: str) -> str: + """Clean opinion content + + :param text: text to clean + :return: cleaned text + """ + + # Replace line breaks with spaces and get rid of double spaces + text = re.sub(" +", " ", " ".join(text.split("\n"))).strip() + + # Remove non-alphanumeric and non-whitespace characters from lowercased text + return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower()) + + +# TODO ------------------------ remove until here ------------------------------- + + def get_opinions_cleaned_content( cluster_id, ) -> tuple[Optional[str], list[dict], int, bool]: @@ -432,10 +471,7 @@ def get_opinions_cleaned_content( soup = BeautifulSoup(content, features="html.parser") opinion_text = soup.getText(separator=" ", strip=True) - prep_text = re.sub( - " +", " ", " ".join(opinion_text.split("\n")) - ).strip() - prep_text = re.sub(r"[^a-zA-Z0-9 ]", "", prep_text.lower()) + prep_text = clean_opinion_content(opinion_text) cl_cleaned_opinions.append( { @@ -485,10 +521,7 @@ def get_opinions_columbia_file(xml_filepath: str) -> list: opinion_content = op.get("opinion") soup = BeautifulSoup(opinion_content, "html.parser") opinion_text = soup.getText(separator=" ", strip=True) - opinion_text = re.sub( - " +", " ", " ".join(opinion_text.split("\n")) - ).strip() - cleaned_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_text.lower()) + cleaned_opinion = clean_opinion_content(opinion_text) op["opinion"] = cleaned_opinion return opinions @@ -545,6 +578,78 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None: logger.info(msg=f"Opinions reordered for cluster id: {oc.id}") +def update_opinions( + cluster_id: int, + cl_opinions: list, + columbia_opinions: list, + matches: dict, + cluster_has_combined_opinion: bool, + start_position: int, +): + """Update opinions with correct order + + :param cluster_id: + :param cl_opinions: a list with cleaned opinions from cl + :param columbia_opinions: a ordered list with cleaned opinions from xml file + :param matches: a dict with the matches of each opinion of both lists + :param cluster_has_combined_opinion: True if the cluster has combined opinions + :param start_position: the number from where the order should begin for + non-combined opinions + :return: None + """ + update_failed = False + + with transaction.atomic(): + for file_pos, cl_pos in matches.items(): + # file_pos is the correct index to find the opinion id to update + file_opinion = columbia_opinions[file_pos] + # the order was calculated using the xml file + file_order = file_opinion.get("order") + start_position + cl_opinion = cl_opinions[cl_pos] + opinion_id_to_update = cl_opinion.get("id") + + if opinion_id_to_update: + try: + # Update opinion order + op = Opinion.objects.get(id=opinion_id_to_update) + op.order = file_order + op.save() + except Opinion.DoesNotExist: + # This should not happen, but it is better to be + # cautious + logger.warning( + f"We can't update opinion, opinion doesn't exist " + f"with id: {opinion_id_to_update}" + ) + update_failed = True + break + + if cluster_has_combined_opinion and not update_failed: + combined_opinions_cluster = Opinion.objects.filter( + cluster_id=cluster_id, type="010combined" + ).order_by("id") + + # Show combined opinions at beginning + for opinion_order, cluster_op in enumerate( + combined_opinions_cluster + ): + cluster_op.order = opinion_order + cluster_op.save() + + if update_failed: + # There was an error updating an opinion, rollback all changes for + # cluster's opinions + logger.warning( + f"There was an error updating the order of opinions of the " + f"cluster id: {cluster_id}" + ) + transaction.set_rollback(True) + else: + logger.info( + f"The order of opinions was updated, cluster id: {cluster_id}" + ) + + def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None: """Update opinion ordering for columbia clusters @@ -614,7 +719,7 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None: if op.get("opinion") ] - matches = match_text_lists( + matches = match_opinion_lists( columbia_opinions_content, cl_opinions_content, ) @@ -638,57 +743,15 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None: # Go to next cluster id continue - failed = False - for file_pos, cl_pos in matches.items(): - # file_pos is the correct index to find the opinion id to update - file_opinion = extracted_columbia_opinions[file_pos] - # the order was calculated using the xml file - file_order = file_opinion.get("order") + start_position - cl_opinion = cl_cleaned_opinions[cl_pos] - opinion_id_to_update = cl_opinion.get("id") - - if opinion_id_to_update: - try: - # Save opinion - op = Opinion.objects.get(id=opinion_id_to_update) - op.order = file_order - op.save() - logger.info( - f"Cluster id processed: {cluster_id} Update opinion id: {opinion_id_to_update} with position: {file_order}" - ) - except Opinion.DoesNotExist: - logger.warning( - f"We can't update opinion, opinion doesn't exist with " - f"id: {opinion_id_to_update}" - ) - failed = True - break - else: - logger.warning( - f"We can't update opinion, empty opinion id " - f"from cluster: {cluster_id}" - ) - failed = True - break - - if cluster_has_combined_opinion and not failed: - combined_opinions_cluster = Opinion.objects.filter( - cluster_id=cluster_id, type="010combined" - ).order_by("id") - - # Show combined opinions at beginning - for opinion_order, cluster_op in enumerate( - combined_opinions_cluster - ): - cluster_op.order = opinion_order - cluster_op.save() - - else: - # No matches found - logger.warning( - f"Failed to match opinions from cluster id: {cluster_id}" + # Update all opinions order + update_opinions( + cluster_id, + cl_cleaned_opinions, + extracted_columbia_opinions, + matches, + cluster_has_combined_opinion, + start_position, ) - continue class Command(BaseCommand): From f928aa021fe9de812f9e82b64a044582b5ffda78 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Mon, 19 Feb 2024 18:13:46 -0600 Subject: [PATCH 030/372] fix(opinion_order): update poetry.lock and pyproject.toml --- poetry.lock | 13 ++++++++++++- pyproject.toml | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 24a1c45791..25db969843 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1062,6 +1062,17 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -5105,4 +5116,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.12, <3.13" -content-hash = "d0cb9ebf26ba111318df8c00976f71ad6b18ffc1aafab1df3b506bfe5128611d" +content-hash = "a8dfd3edc2209cb2d357696b751508ebd0c249be0b1b408f2f7225884a5e7b2a" diff --git a/pyproject.toml b/pyproject.toml index 32afda8f5f..e8d88a61f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,6 +112,7 @@ httpx = {extras = ["http2"], version = "^0.26.0"} django-model-utils = "^4.3.1" juriscraper = "*" django-permissions-policy = "^4.19.0" +django-ordered-model = "^3.7.4" [tool.poetry.group.dev.dependencies] From d46b42fd39b6abacf301ae3ce46ed090d5cb5446 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Mon, 19 Feb 2024 18:21:48 -0600 Subject: [PATCH 031/372] fix(opinion_order): rename migrations --- .../{0024_order_opinions.py => 0027_order_opinions.py} | 2 +- .../{0024_order_opinions.sql => 0027_order_opinions.sql} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cl/search/migrations/{0024_order_opinions.py => 0027_order_opinions.py} (98%) rename cl/search/migrations/{0024_order_opinions.sql => 0027_order_opinions.sql} (100%) diff --git a/cl/search/migrations/0024_order_opinions.py b/cl/search/migrations/0027_order_opinions.py similarity index 98% rename from cl/search/migrations/0024_order_opinions.py rename to cl/search/migrations/0027_order_opinions.py index 1abaed4d76..e1c602e2e5 100644 --- a/cl/search/migrations/0024_order_opinions.py +++ b/cl/search/migrations/0027_order_opinions.py @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("search", "0023_add_docket_sources_noop"), + ("search", "0026_drop_docket_unique_together_and_more"), ] operations = [ diff --git a/cl/search/migrations/0024_order_opinions.sql b/cl/search/migrations/0027_order_opinions.sql similarity index 100% rename from cl/search/migrations/0024_order_opinions.sql rename to cl/search/migrations/0027_order_opinions.sql From cefb8482ed586e65526f59818901eca56ca26e7d Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Mon, 6 May 2024 18:12:54 -0600 Subject: [PATCH 032/372] feat(opinion_order): resolve merge conflict, rename migrations --- ..._order_opinions.py => 0031_order_opinions.py} | 2 +- ...rder_opinions.sql => 0031_order_opinions.sql} | 0 poetry.lock | 16 +++++++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) rename cl/search/migrations/{0027_order_opinions.py => 0031_order_opinions.py} (98%) rename cl/search/migrations/{0027_order_opinions.sql => 0031_order_opinions.sql} (100%) diff --git a/cl/search/migrations/0027_order_opinions.py b/cl/search/migrations/0031_order_opinions.py similarity index 98% rename from cl/search/migrations/0027_order_opinions.py rename to cl/search/migrations/0031_order_opinions.py index e1c602e2e5..9e7774203d 100644 --- a/cl/search/migrations/0027_order_opinions.py +++ b/cl/search/migrations/0031_order_opinions.py @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("search", "0026_drop_docket_unique_together_and_more"), + ("search", "0030_recapdocument_pacer_doc_id_idx"), ] operations = [ diff --git a/cl/search/migrations/0027_order_opinions.sql b/cl/search/migrations/0031_order_opinions.sql similarity index 100% rename from cl/search/migrations/0027_order_opinions.sql rename to cl/search/migrations/0031_order_opinions.sql diff --git a/poetry.lock b/poetry.lock index f22583b490..109cadc2d3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "amqp" @@ -1062,6 +1062,17 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -2467,7 +2478,6 @@ files = [ {file = "lxml-5.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9e2addd2d1866fe112bc6f80117bcc6bc25191c5ed1bfbcf9f1386a884252ae8"}, {file = "lxml-5.2.1-cp37-cp37m-win32.whl", hash = "sha256:f51969bac61441fd31f028d7b3b45962f3ecebf691a510495e5d2cd8c8092dbd"}, {file = "lxml-5.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c"}, - {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3e183c6e3298a2ed5af9d7a356ea823bccaab4ec2349dc9ed83999fd289d14d5"}, {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:804f74efe22b6a227306dd890eecc4f8c59ff25ca35f1f14e7482bbce96ef10b"}, {file = "lxml-5.2.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:08802f0c56ed150cc6885ae0788a321b73505d2263ee56dad84d200cab11c07a"}, {file = "lxml-5.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f8c09ed18ecb4ebf23e02b8e7a22a05d6411911e6fabef3a36e4f371f4f2585"}, @@ -5259,4 +5269,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.12, <3.13" -content-hash = "994213014ffbb4387604c85fddd76e01112f4e3b66a1be6bc77f601b5b1de1b8" +content-hash = "c6a4dd1a9c6ecf961e254a3d6d0387f4d5e6f6fdb4181c33e2c55174e68d4454" From 0b93a8c6f1b41158ddafc04ed593ce3a58686c24 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Tue, 14 May 2024 19:27:47 -0500 Subject: [PATCH 033/372] feat(cl_scrape_opinions): ingest more Juriscraper fields Partially solves #4042 Ingest "lower_courts" into Docket.appeal_from_str Ingest "dispositions" into OpinionCluster.disposition Ingest "authors" into Opinion.author_str Ingest "joined_by" into Opinion.joined_by Ingest "per_curiam" into Opinion.per_curiam Ingest "types" into Opinion.type Last 4 fields are not supported in Juriscraper as of yet, but the changes proposed keep the default behavior --- cl/scrapers/management/commands/cl_scrape_opinions.py | 7 ++++++- cl/scrapers/utils.py | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py index 1ea37385b8..d3387a7b72 100644 --- a/cl/scrapers/management/commands/cl_scrape_opinions.py +++ b/cl/scrapers/management/commands/cl_scrape_opinions.py @@ -103,6 +103,7 @@ def make_objects( item.get("source") or Docket.SCRAPER, blocked=blocked, date_blocked=date_blocked, + appeal_from_str=item.get("lower_courts", ""), ) cluster = OpinionCluster( @@ -117,6 +118,7 @@ def make_objects( blocked=blocked, date_blocked=date_blocked, syllabus=item.get("summaries", ""), + disposition=item.get("dispositions", ""), ) cites = [item.get(key, "") for key in ["citations", "parallel_citations"]] @@ -131,9 +133,12 @@ def make_objects( url = "" opinion = Opinion( - type=Opinion.COMBINED, + type=item.get("types", Opinion.COMBINED), sha1=sha1_hash, download_url=url, + author_str=item.get("authors", ""), + joined_by_str=item.get("joined_by", ""), + per_curiam=item.get("per_curiam", False), ) cf = ContentFile(content) diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py index bb7d47ebae..d75bada36f 100644 --- a/cl/scrapers/utils.py +++ b/cl/scrapers/utils.py @@ -295,6 +295,7 @@ def update_or_create_docket( date_blocked: date | None = None, date_argued: date | None = None, ia_needs_upload: bool | None = None, + appeal_from_str: str = "", ) -> Docket: """Look for an existing Docket and update it or create a new one if it's not found. @@ -309,6 +310,7 @@ def update_or_create_docket( :param date_blocked: The docket date_blocked if it's blocked. :param date_argued: The docket date_argued if it's an oral argument. :param ia_needs_upload: If the docket needs upload to IA, default None. + :param appeal_from_str: Name (not standardized id) of the lower level court. :return: The docket. """ @@ -320,6 +322,7 @@ def update_or_create_docket( "date_blocked": date_blocked, "date_argued": date_argued, "ia_needs_upload": ia_needs_upload, + "appeal_from_str": appeal_from_str, } docket = async_to_sync(find_docket_object)(court_id, None, docket_number) From d1a1708f363764056e4c6f9e0159e460675ad3da Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 5 Jun 2024 12:58:05 -0600 Subject: [PATCH 034/372] fix(opinion_order): update poetry.lock to solve merge conflicts --- poetry.lock | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 6d7f85852a..cbc5ec2cc3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1058,6 +1058,17 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -5254,4 +5265,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.12, <3.13" -content-hash = "814ca0b0dc8db689f83e391fc58b494de48f6321085872bfaa8e37b7a7fc0e99" +content-hash = "a64d61d094d3896cb204e882ff2471b4f3b69def7416a2b50cdcedc9acf6455e" From 754d71fda6d7a12d4dfc6dddf121399d6d0582c9 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 5 Jun 2024 13:06:55 -0600 Subject: [PATCH 035/372] fix(opinion_order): rename migration --- .../{0031_order_opinions.py => 0032_order_opinions.py} | 2 +- .../{0031_order_opinions.sql => 0032_order_opinions.sql} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cl/search/migrations/{0031_order_opinions.py => 0032_order_opinions.py} (98%) rename cl/search/migrations/{0031_order_opinions.sql => 0032_order_opinions.sql} (100%) diff --git a/cl/search/migrations/0031_order_opinions.py b/cl/search/migrations/0032_order_opinions.py similarity index 98% rename from cl/search/migrations/0031_order_opinions.py rename to cl/search/migrations/0032_order_opinions.py index 9e7774203d..b34bb01d48 100644 --- a/cl/search/migrations/0031_order_opinions.py +++ b/cl/search/migrations/0032_order_opinions.py @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("search", "0030_recapdocument_pacer_doc_id_idx"), + ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"), ] operations = [ diff --git a/cl/search/migrations/0031_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql similarity index 100% rename from cl/search/migrations/0031_order_opinions.sql rename to cl/search/migrations/0032_order_opinions.sql From 06c814d2b4bb3624203da11d371fb4d3eb984772 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Thu, 27 Jun 2024 09:49:23 -0600 Subject: [PATCH 036/372] feat(scrape_pacer_free): run scraper by specifying court run scraper for court by specifying start and end date run scraper for court by specifying start and end date and day span --- .../commands/scrape_pacer_free_opinions.py | 258 ++++++++++++------ 1 file changed, 177 insertions(+), 81 deletions(-) diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py index e4717ec06d..d42d211e0f 100644 --- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py +++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py @@ -1,12 +1,13 @@ import argparse +import datetime import os -from datetime import date, timedelta from typing import Callable, Dict, List, Optional, Tuple, cast from celery.canvas import chain from django.conf import settings from django.db.models import QuerySet from django.utils.timezone import now +from juriscraper.lib.date_utils import make_date_range_tuples from juriscraper.lib.exceptions import PacerLoginException from juriscraper.lib.string_utils import CaseNameTweaker from requests import RequestException @@ -19,6 +20,7 @@ mark_court_done_on_date, process_free_opinion_result, ) +from cl.lib.argparse_types import valid_date from cl.lib.celery_utils import CeleryThrottle from cl.lib.command_utils import VerboseCommand, logger from cl.lib.pacer import map_cl_to_pacer_id, map_pacer_to_cl_id @@ -35,7 +37,7 @@ def get_next_date_range( court_id: str, span: int = 7, -) -> Tuple[Optional[date], Optional[date]]: +) -> Tuple[Optional[datetime.date], Optional[datetime.date]]: """Get the next start and end query dates for a court. Check the DB for the last date for a court that was completed. Return the @@ -64,15 +66,16 @@ def get_next_date_range( # Ensure that we go back five days from the last time we had success if # that success was in the last few days. last_complete_date = min( - now().date() - timedelta(days=5), last_completion_log.date_queried + now().date() - datetime.timedelta(days=5), + last_completion_log.date_queried, ) next_end_date = min( - now().date(), last_complete_date + timedelta(days=span) + now().date(), last_complete_date + datetime.timedelta(days=span) ) return last_complete_date, next_end_date -def mark_court_in_progress(court_id: str, d: date) -> QuerySet: +def mark_court_in_progress(court_id: str, d: datetime.date) -> QuerySet: log = PACERFreeDocumentLog.objects.create( status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS, date_queried=d, @@ -81,6 +84,51 @@ def mark_court_in_progress(court_id: str, d: date) -> QuerySet: return log +def fetch_doc_report( + pacer_court_id: int, + start: Optional[datetime.date], + end: Optional[datetime.date], +): + exception_raised = False + status = PACERFreeDocumentLog.SCRAPE_FAILED + + logger.info( + "Attempting to get latest document references for " + "%s between %s and %s", + pacer_court_id, + start, + end, + ) + try: + status = get_and_save_free_document_report(pacer_court_id, start, end) + except ( + RequestException, + ReadTimeoutError, + IndexError, + TypeError, + PacerLoginException, + ValueError, + ) as exc: + if isinstance(exc, (RequestException, ReadTimeoutError)): + reason = "network error." + elif isinstance(exc, IndexError): + reason = "PACER 6.3 bug." + elif isinstance(exc, (TypeError, ValueError)): + reason = "failing PACER website." + elif isinstance(exc, PacerLoginException): + reason = "PACER login issue." + else: + reason = "unknown reason." + logger.error( + "Failed to get free document references for " + f"{pacer_court_id} between {start} and " + f"{end} due to {reason}." + ) + exception_raised = True + + return exception_raised, status + + def get_and_save_free_document_reports(options: OptionsType) -> None: """Query the Free Doc Reports on PACER and get a list of all the free documents. Do not download those items, as that step is done later. For now @@ -95,96 +143,100 @@ def get_and_save_free_document_reports(options: OptionsType) -> None: done. """ # Kill any *old* logs that report they're in progress. (They've failed.) - three_hrs_ago = now() - timedelta(hours=3) + three_hrs_ago = now() - datetime.timedelta(hours=3) PACERFreeDocumentLog.objects.filter( date_started__lt=three_hrs_ago, status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS, ).update(status=PACERFreeDocumentLog.SCRAPE_FAILED) - cl_court_ids = ( - Court.federal_courts.district_or_bankruptcy_pacer_courts() - .filter( - in_use=True, - end_date=None, + excluded_court_ids = ["casb", "gub", "ilnb", "innb", "miwb", "ohsb", "prb"] + + if options["courts"] != ["all"]: + cl_court_ids = ( + Court.federal_courts.district_or_bankruptcy_pacer_courts() + .filter( + in_use=True, + end_date=None, + pk__in=options["courts"], + ) + .exclude(pk__in=excluded_court_ids) + .values_list("pk", flat=True) ) - .exclude(pk__in=["casb", "gub", "ilnb", "innb", "miwb", "ohsb", "prb"]) - .values_list("pk", flat=True) - ) + else: + cl_court_ids = ( + Court.federal_courts.district_or_bankruptcy_pacer_courts() + .filter( + in_use=True, + end_date=None, + ) + .exclude(pk__in=excluded_court_ids) + .values_list("pk", flat=True) + ) + pacer_court_ids = [map_cl_to_pacer_id(v) for v in cl_court_ids] - today = now() - for pacer_court_id in pacer_court_ids: - while True: - next_start_d, next_end_d = get_next_date_range(pacer_court_id) - if next_end_d is None: - logger.warning( - f"Free opinion scraper for {pacer_court_id} still " - "in progress." - ) - break - logger.info( - "Attempting to get latest document references for " - "%s between %s and %s", - pacer_court_id, - next_start_d, - next_end_d, - ) - mark_court_in_progress(pacer_court_id, next_end_d) - try: - status = get_and_save_free_document_report( - pacer_court_id, next_start_d, next_end_d - ) - except ( - RequestException, - ReadTimeoutError, - IndexError, - TypeError, - PacerLoginException, - ValueError, - ) as exc: - if isinstance(exc, (RequestException, ReadTimeoutError)): - reason = "network error." - elif isinstance(exc, IndexError): - reason = "PACER 6.3 bug." - elif isinstance(exc, (TypeError, ValueError)): - reason = "failing PACER website." - elif isinstance(exc, PacerLoginException): - reason = "PACER login issue." - else: - reason = "unknown reason." - logger.error( - "Failed to get free document references for " - f"{pacer_court_id} between {next_start_d} and " - f"{next_end_d} due to {reason}." + if options["date_start"] and options["date_end"]: + date_ranges = make_date_range_tuples( + options["date_start"], options["date_end"], gap=options["span"] + ) + for pacer_court_id in pacer_court_ids: + for start, end in date_ranges: + exception_raised, status = fetch_doc_report( + pacer_court_id, start, end ) - mark_court_done_on_date( - PACERFreeDocumentLog.SCRAPE_FAILED, - pacer_court_id, - next_end_d, + if exception_raised: + break + + else: + today = now() + for pacer_court_id in pacer_court_ids: + while True: + next_start_d, next_end_d = get_next_date_range(pacer_court_id) + print( + f"next_start_d: {next_start_d} - next_end_d: {next_end_d}" ) - break + if next_end_d is None: + logger.warning( + f"Free opinion scraper for {pacer_court_id} still " + "in progress." + ) + break + + mark_court_in_progress(pacer_court_id, next_end_d) - mark_court_done_on_date(status, pacer_court_id, next_end_d) + exc, status = fetch_doc_report( + pacer_court_id, next_start_d, next_end_d + ) + if exc: + mark_court_done_on_date( + PACERFreeDocumentLog.SCRAPE_FAILED, + pacer_court_id, + next_end_d, + ) + break - if status == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL: - if next_end_d >= today.date(): - logger.info( - "Got all document references for '%s'.", pacer_court_id + mark_court_done_on_date(status, pacer_court_id, next_end_d) + + if status == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL: + if next_end_d >= today.date(): + logger.info( + "Got all document references for '%s'.", + pacer_court_id, + ) + # Break from while loop, onwards to next court + break + else: + # More dates to do; let it continue + continue + + elif status == PACERFreeDocumentLog.SCRAPE_FAILED: + logger.error( + "Encountered critical error on %s " + "(network error?). Marking as failed and " + "pressing on." % pacer_court_id ) # Break from while loop, onwards to next court break - else: - # More dates to do; let it continue - continue - - elif status == PACERFreeDocumentLog.SCRAPE_FAILED: - logger.error( - "Encountered critical error on %s " - "(network error?). Marking as failed and " - "pressing on." % pacer_court_id - ) - # Break from while loop, onwards to next court - break def get_pdfs(options: OptionsType) -> None: @@ -202,7 +254,18 @@ def get_pdfs(options: OptionsType) -> None: q = cast(str, options["queue"]) index = options["index"] cnt = CaseNameTweaker() - rows = PACERFreeDocumentRow.objects.filter(error_msg="").only("pk") + rows = PACERFreeDocumentRow.objects.filter(error_msg="") + + if options["courts"] != ["all"]: + rows = rows.filter(court_id__in=options["courts"]) + + if options["date_start"] and options["date_end"]: + rows = rows.filter( + date_filed__gte=options["date_start"], + date_filed__lte=options["date_end"], + ) + + rows = rows.only("pk") count = rows.count() task_name = "downloading" if index: @@ -297,9 +360,42 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None: default=False, help="Do we index as we go, or leave that to be done later?", ) + parser.add_argument( + "--courts", + type=str, + default=["all"], + nargs="*", + help="The courts that you wish to parse.", + ) + parser.add_argument( + "--date-start", + dest="date_start", + required=False, + type=valid_date, + help="Date when the query should start.", + ) + parser.add_argument( + "--date-end", + dest="date_end", + required=False, + type=valid_date, + help="Date when the query should end.", + ) + parser.add_argument( + "--span", + type=int, + default=7, + help="The number of days, inclusive, that a query should span at a time.", + ) def handle(self, *args: List[str], **options: OptionsType) -> None: super().handle(*args, **options) + + if options["date_start"] and options["date_end"]: + if options["date_start"] > options["date_end"]: # type: ignore + print("Error: date-end must be greater than date-start.") + return + action = cast(Callable, options["action"]) action(options) From 51c59b59926b8c4737a2f1045b1385f0b7f451bf Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Tue, 9 Jul 2024 20:25:35 -0500 Subject: [PATCH 037/372] feat(logging): enable juriscraper loggers on console handler Solves #4188 Most juriscraper files use a logger called "Logger", whose specific name must be added to the `loggers` dict for it to be used. Adding "juriscraper" module level logger in case other loggers are defined --- cl/settings/project/logging.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cl/settings/project/logging.py b/cl/settings/project/logging.py index 226cecbee4..0b1a793246 100644 --- a/cl/settings/project/logging.py +++ b/cl/settings/project/logging.py @@ -78,6 +78,16 @@ def skip_unreadable_post(record): }, # This is the one that's used practically everywhere in the code. "cl": {"handlers": ["console"], "level": "INFO", "propagate": True}, + "juriscraper": { + "handlers": ["console"], + "propagate": True, + "level": "DEBUG", + }, + "Logger": { + "handlers": ["console"], + "propagate": True, + "level": "DEBUG", + }, }, } From 683a79726b47e131a1885d2aa4c94464e8ea13b2 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 10 Jul 2024 15:34:37 -0600 Subject: [PATCH 038/372] feat(scrape_pacer_free_opinions): save pacer html files update task to update court log status --- .../commands/scrape_pacer_free_opinions.py | 84 ++++++++++++------- cl/corpus_importer/tasks.py | 48 +++++++---- cl/recap/models.py | 2 + 3 files changed, 88 insertions(+), 46 deletions(-) diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py index d42d211e0f..1b40006ab2 100644 --- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py +++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py @@ -5,9 +5,7 @@ from celery.canvas import chain from django.conf import settings -from django.db.models import QuerySet from django.utils.timezone import now -from juriscraper.lib.date_utils import make_date_range_tuples from juriscraper.lib.exceptions import PacerLoginException from juriscraper.lib.string_utils import CaseNameTweaker from requests import RequestException @@ -75,7 +73,9 @@ def get_next_date_range( return last_complete_date, next_end_date -def mark_court_in_progress(court_id: str, d: datetime.date) -> QuerySet: +def mark_court_in_progress( + court_id: str, d: datetime.date +) -> PACERFreeDocumentLog: log = PACERFreeDocumentLog.objects.create( status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS, date_queried=d, @@ -88,9 +88,11 @@ def fetch_doc_report( pacer_court_id: int, start: Optional[datetime.date], end: Optional[datetime.date], + log_id: int = 0, ): exception_raised = False status = PACERFreeDocumentLog.SCRAPE_FAILED + rows_to_create = 0 logger.info( "Attempting to get latest document references for " @@ -100,7 +102,7 @@ def fetch_doc_report( end, ) try: - status = get_and_save_free_document_report(pacer_court_id, start, end) + status, rows_to_create = get_and_save_free_document_report(pacer_court_id, start, end, log_id) # type: ignore except ( RequestException, ReadTimeoutError, @@ -122,10 +124,19 @@ def fetch_doc_report( logger.error( "Failed to get free document references for " f"{pacer_court_id} between {start} and " - f"{end} due to {reason}." + f"{end} due to {reason}.", + exc_info=True, ) exception_raised = True + logger.info( + "Got %s document references for " "%s between %s and %s", + rows_to_create, + pacer_court_id, + start, + end, + ) + return exception_raised, status @@ -176,25 +187,19 @@ def get_and_save_free_document_reports(options: OptionsType) -> None: pacer_court_ids = [map_cl_to_pacer_id(v) for v in cl_court_ids] if options["date_start"] and options["date_end"]: - date_ranges = make_date_range_tuples( - options["date_start"], options["date_end"], gap=options["span"] - ) for pacer_court_id in pacer_court_ids: - for start, end in date_ranges: - exception_raised, status = fetch_doc_report( - pacer_court_id, start, end - ) - if exception_raised: - break - + # Here we do not save the log since if an incorrect range is entered + # the next time the daily cron is executed the command could skip days + exc, status = fetch_doc_report( + pacer_court_id, options["date_start"], options["date_end"] # type: ignore + ) + if exc: + break else: today = now() for pacer_court_id in pacer_court_ids: while True: next_start_d, next_end_d = get_next_date_range(pacer_court_id) - print( - f"next_start_d: {next_start_d} - next_end_d: {next_end_d}" - ) if next_end_d is None: logger.warning( f"Free opinion scraper for {pacer_court_id} still " @@ -202,20 +207,21 @@ def get_and_save_free_document_reports(options: OptionsType) -> None: ) break - mark_court_in_progress(pacer_court_id, next_end_d) + log = mark_court_in_progress(pacer_court_id, next_end_d) exc, status = fetch_doc_report( - pacer_court_id, next_start_d, next_end_d + pacer_court_id, next_start_d, next_end_d, log.pk ) if exc: + # Something failed mark_court_done_on_date( + log.pk, PACERFreeDocumentLog.SCRAPE_FAILED, - pacer_court_id, - next_end_d, ) break - mark_court_done_on_date(status, pacer_court_id, next_end_d) + # Scrape successful + mark_court_done_on_date(log.pk, status) if status == PACERFreeDocumentLog.SCRAPE_SUCCESSFUL: if next_end_d >= today.date(): @@ -233,7 +239,8 @@ def get_and_save_free_document_reports(options: OptionsType) -> None: logger.error( "Encountered critical error on %s " "(network error?). Marking as failed and " - "pressing on." % pacer_court_id + "pressing on." % pacer_court_id, + exc_info=True, ) # Break from while loop, onwards to next court break @@ -319,6 +326,21 @@ def ocr_available(options: OptionsType) -> None: logger.info(f"Sent {i + 1}/{count} tasks to celery so far.") +def do_monthly(): + # Run everything monthly range + pass + + +def do_weekly(): + # Run everything weekly range + pass + + +def do_all(): + # run all courts since first day started to query each court + pass + + def do_everything(options: OptionsType): logger.info("Running and compiling free document reports.") get_and_save_free_document_reports(options) @@ -381,21 +403,16 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None: type=valid_date, help="Date when the query should end.", ) - parser.add_argument( - "--span", - type=int, - default=7, - help="The number of days, inclusive, that a query should span at a time.", - ) def handle(self, *args: List[str], **options: OptionsType) -> None: super().handle(*args, **options) if options["date_start"] and options["date_end"]: if options["date_start"] > options["date_end"]: # type: ignore - print("Error: date-end must be greater than date-start.") + print( + "Error: date-end must be greater or equal than date-start option." + ) return - action = cast(Callable, options["action"]) action(options) @@ -404,4 +421,7 @@ def handle(self, *args: List[str], **options: OptionsType) -> None: "get-report-results": get_and_save_free_document_reports, "get-pdfs": get_pdfs, "ocr-available": ocr_available, + "do-monthly": do_monthly, + "do-weekly": do_weekly, + "do-all": do_all, } diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py index 09b2a10526..697050fb99 100644 --- a/cl/corpus_importer/tasks.py +++ b/cl/corpus_importer/tasks.py @@ -322,17 +322,15 @@ def download_recap_item( soft_time_limit=240, ) def get_and_save_free_document_report( - self: Task, - court_id: str, - start: date, - end: date, -) -> int: + self: Task, court_id: str, start: date, end: date, log_id: int = 0 +) -> Tuple[int, int]: """Download the Free document report and save it to the DB. :param self: The Celery task. :param court_id: A pacer court id. :param start: a date object representing the first day to get results. :param end: a date object representing the last day to get results. + :param log_id: a PACERFreeDocumentLog object id :return: The status code of the scrape """ cookies = get_or_cache_pacer_cookies( @@ -397,6 +395,34 @@ def get_and_save_free_document_report( return PACERFreeDocumentLog.SCRAPE_FAILED raise self.retry(exc=exc, countdown=5) + if log_id: + # We only save the html when the script is run automatically every day + log = PACERFreeDocumentLog.objects.get(pk=log_id) + for result in report.responses: + if isinstance(result, dict): + response = result.get("response") + query_start = result.get("start") + query_end = result.get("end") + + if response and query_start and query_end: + pacer_file = PacerHtmlFiles( + content_object=log, + upload_type=UPLOAD_TYPE.FREE_OPINIONS_REPORT, + ) + pacer_file.filepath.save( + f"free_opinions_report_{court_id}_from_{query_start.replace('/', '-')}_to_{query_end.replace('/', '-')}.html", + ContentFile(response.text.encode()), + ) + else: + # FreeOpinionReport now returns a list of dicts with additional data + # instead of a list of requests responses. + # This is temporary while the new version of juriscraper is added to + # courtlistener + logger.info( + "New version of juriscraper not yet implemented. Can't " + "save PacerHtmlFiles object." + ) + document_rows_to_create = [] for row in results: document_row = PACERFreeDocumentRow( @@ -417,7 +443,7 @@ def get_and_save_free_document_report( # Create PACERFreeDocumentRow in bulk PACERFreeDocumentRow.objects.bulk_create(document_rows_to_create) - return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL + return PACERFreeDocumentLog.SCRAPE_SUCCESSFUL, len(document_rows_to_create) @app.task(bind=True, max_retries=5, ignore_result=True) @@ -866,18 +892,12 @@ def upload_to_ia( @app.task -def mark_court_done_on_date( - status: int, court_id: str, d: date -) -> Optional[int]: - court_id = map_pacer_to_cl_id(court_id) +def mark_court_done_on_date(log_id: int, status: int) -> Optional[int]: try: - doc_log = PACERFreeDocumentLog.objects.filter( - status=PACERFreeDocumentLog.SCRAPE_IN_PROGRESS, court_id=court_id - ).latest("date_queried") + doc_log = PACERFreeDocumentLog.objects.get(pk=log_id) except PACERFreeDocumentLog.DoesNotExist: return None else: - doc_log.date_queried = d doc_log.status = status doc_log.date_completed = now() doc_log.save() diff --git a/cl/recap/models.py b/cl/recap/models.py index f1183f28a9..6c5ec7055d 100644 --- a/cl/recap/models.py +++ b/cl/recap/models.py @@ -29,6 +29,7 @@ class UPLOAD_TYPE: CASE_QUERY_RESULT_PAGE = 14 APPELLATE_CASE_QUERY_RESULT_PAGE = 15 ACMS_DOCKET_JSON = 16 + FREE_OPINIONS_REPORT = 17 NAMES = ( (DOCKET, "HTML Docket"), (ATTACHMENT_PAGE, "HTML attachment page"), @@ -46,6 +47,7 @@ class UPLOAD_TYPE: (CASE_QUERY_RESULT_PAGE, "Case query result page"), (APPELLATE_CASE_QUERY_RESULT_PAGE, "Appellate Case query result page"), (ACMS_DOCKET_JSON, "ACMS docket JSON object"), + (FREE_OPINIONS_REPORT, "Free opinions report"), ) From 97ce67d0e03920f8197ccd5f8ffafd5be6f538e7 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 10 Jul 2024 15:47:46 -0600 Subject: [PATCH 039/372] feat(scrape_pacer_free_opinions): migrations for new upload type --- ...4_alter_pacerhtmlfiles_upload_type_noop.py | 64 +++++++++++++++++++ ..._alter_pacerhtmlfiles_upload_type_noop.sql | 10 +++ 2 files changed, 74 insertions(+) create mode 100644 cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py create mode 100644 cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql diff --git a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py b/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py new file mode 100644 index 0000000000..a6b0c7ac66 --- /dev/null +++ b/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py @@ -0,0 +1,64 @@ +# Generated by Django 5.0.6 on 2024-07-10 21:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("recap", "0013_processingqueue_update"), + ] + + operations = [ + migrations.AlterField( + model_name="pacerhtmlfiles", + name="upload_type", + field=models.SmallIntegerField( + choices=[ + (1, "HTML Docket"), + (2, "HTML attachment page"), + (3, "PDF"), + (4, "Docket history report"), + (5, "Appellate HTML docket"), + (6, "Appellate HTML attachment page"), + (7, "Internet Archive XML docket"), + (8, "Case report (iquery.pl) page"), + (9, "Claims register page"), + (10, "Zip archive of RECAP Documents"), + (11, "Email in the SES storage format"), + (12, "Case query page"), + (13, "Appellate Case query page"), + (14, "Case query result page"), + (15, "Appellate Case query result page"), + (16, "ACMS docket JSON object"), + (17, "Free opinions report"), + ], + help_text="The type of object that is uploaded", + ), + ), + migrations.AlterField( + model_name="processingqueue", + name="upload_type", + field=models.SmallIntegerField( + choices=[ + (1, "HTML Docket"), + (2, "HTML attachment page"), + (3, "PDF"), + (4, "Docket history report"), + (5, "Appellate HTML docket"), + (6, "Appellate HTML attachment page"), + (7, "Internet Archive XML docket"), + (8, "Case report (iquery.pl) page"), + (9, "Claims register page"), + (10, "Zip archive of RECAP Documents"), + (11, "Email in the SES storage format"), + (12, "Case query page"), + (13, "Appellate Case query page"), + (14, "Case query result page"), + (15, "Appellate Case query result page"), + (16, "ACMS docket JSON object"), + (17, "Free opinions report"), + ], + help_text="The type of object that is uploaded", + ), + ), + ] diff --git a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql b/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql new file mode 100644 index 0000000000..b0d9d1e378 --- /dev/null +++ b/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql @@ -0,0 +1,10 @@ +BEGIN; +-- +-- Alter field upload_type on pacerhtmlfiles +-- +-- (no-op) +-- +-- Alter field upload_type on processingqueue +-- +-- (no-op) +COMMIT; From ce3bba145c5f4e6d97d8dadc5ae1e9640a7973e7 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 11:01:43 -0400 Subject: [PATCH 040/372] feat(setting): Adds new setting for managing a list of proxies --- cl/settings/project/security.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cl/settings/project/security.py b/cl/settings/project/security.py index 57a0ef19f6..a5af603077 100644 --- a/cl/settings/project/security.py +++ b/cl/settings/project/security.py @@ -16,6 +16,7 @@ EGRESS_PROXY_HOST = env( "EGRESS_PROXY_HOST", default="http://cl-webhook-sentry:9090" ) +EGRESS_PROXY_HOSTS: list[str] = env.list("EGRESS_PROXY_HOSTS", default=[]) SECURE_HSTS_SECONDS = 63_072_000 SECURE_HSTS_INCLUDE_SUBDOMAINS = True From f31ca5870cd1858343b7acee3aa63574bc9deca1 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 11:30:31 -0400 Subject: [PATCH 041/372] feat(lib): Adds proxy selection logic for the ProxyPacerSession class This commit updates the `ProxyPacerSession` class to enable selection of a proxy connection string from the application settings. --- cl/lib/pacer_session.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py index 7c993556cd..6ff68f9174 100644 --- a/cl/lib/pacer_session.py +++ b/cl/lib/pacer_session.py @@ -1,4 +1,5 @@ import pickle +import random from typing import Union from urllib.parse import urlparse @@ -28,14 +29,37 @@ class ProxyPacerSession(PacerSession): """ def __init__( - self, cookies=None, username=None, password=None, client_code=None + self, + cookies=None, + username=None, + password=None, + client_code=None, + proxy=None, ): super().__init__(cookies, username, password, client_code) + self.proxy_address = proxy if proxy else self._pick_proxy_connection() self.proxies = { - "http": settings.EGRESS_PROXY_HOST, + "http": self.proxy_address, } self.headers["X-WhSentry-TLS"] = "true" + def _pick_proxy_connection(self) -> str: + """ + Picks a proxy connection string from available options. + + If the `settings.EGRESS_PROXY_HOSTS` list is empty, this function + returns the value from `settings.EGRESS_PROXY_HOST`. Otherwise, it + randomly chooses a string from the `settings.EGRESS_PROXY_HOSTS` list + and returns it. + + Returns: + str: The chosen proxy connection string. + """ + if not settings.EGRESS_PROXY_HOSTS: + return settings.EGRESS_PROXY_HOST + + return random.choice(settings.EGRESS_PROXY_HOSTS) + def _change_protocol(self, url: str) -> str: """Converts a URL from HTTPS to HTTP protocol. From db2c3ca0b67267b0586f9177431153627ec616ff Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 12:07:45 -0400 Subject: [PATCH 042/372] feat(lib): Tweaks log_into_pacer to return cookie jar and proxy address This commit updates the `log_into_pacer` method to return a tuple containing the user's cookie and the proxy address used for login (if applicable). This improvement provides more context about the login session, facilitating further actions requiring both cookies and potential proxy information. --- cl/lib/pacer_session.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py index 6ff68f9174..740745f134 100644 --- a/cl/lib/pacer_session.py +++ b/cl/lib/pacer_session.py @@ -99,13 +99,13 @@ def log_into_pacer( username: str, password: str, client_code: str | None = None, -) -> RequestsCookieJar: +) -> tuple[RequestsCookieJar, str]: """Log into PACER and return the cookie jar :param username: A PACER username :param password: A PACER password :param client_code: A PACER client_code - :return: Request.CookieJar + :return: A tuple containing the Request.CookieJar and the proxy address """ s = ProxyPacerSession( username=username, @@ -113,7 +113,7 @@ def log_into_pacer( client_code=client_code, ) s.login() - return s.cookies + return s.cookies, s.proxy_address def get_or_cache_pacer_cookies( From a99ac37ecd0858c52a4fc29a4f40278089d86b6f Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 12:25:10 -0400 Subject: [PATCH 043/372] feat(lib): Updates get_or_cache_pacer_cookies to return tuples This commit tweaks the `get_or_cache_pacer_cookies` function to handle the new return format of `log_into_pacer`. It ensures compatibility with both the updated function and any existing cached data that might be in the old format. --- cl/lib/pacer_session.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py index 740745f134..a7543ad7b4 100644 --- a/cl/lib/pacer_session.py +++ b/cl/lib/pacer_session.py @@ -122,7 +122,7 @@ def get_or_cache_pacer_cookies( password: str, client_code: str | None = None, refresh: bool = False, -) -> RequestsCookieJar: +) -> tuple[RequestsCookieJar, str]: """Get PACER cookies for a user or create and cache fresh ones For the PACER Fetch API, we store users' PACER cookies in Redis with a @@ -131,7 +131,7 @@ def get_or_cache_pacer_cookies( This function attempts to get cookies for a user from Redis. If it finds them, it returns them. If not, it attempts to log the user in and then - returns the fresh cookies (after caching them). + returns the fresh cookies and the proxy used to login(after caching them). :param user_pk: The PK of the user attempting to store their credentials. Needed to create the key in Redis. @@ -139,21 +139,27 @@ def get_or_cache_pacer_cookies( :param password: The PACER password of the user :param client_code: The PACER client code of the user :param refresh: If True, refresh the cookies even if they're already cached - :return: Cookies for the PACER user + :return: A tuple containing the Request.CookieJar and the proxy address """ r = get_redis_interface("CACHE", decode_responses=False) - cookies = get_pacer_cookie_from_cache(user_pk, r=r) + cookies_data = get_pacer_cookie_from_cache(user_pk, r=r) ttl_seconds = r.ttl(session_key % user_pk) - if cookies and ttl_seconds >= 300 and not refresh: + if cookies_data and ttl_seconds >= 300 and not refresh: # cookies were found in cache and ttl >= 5 minutes, return them - return cookies + if isinstance(cookies_data, tuple): + return cookies_data + return cookies_data, settings.EGRESS_PROXY_HOST # Unable to find cookies in cache, are about to expire or refresh needed # Login and cache new values. - cookies = log_into_pacer(username, password, client_code) + cookies, proxy = log_into_pacer(username, password, client_code) cookie_expiration = 60 * 60 - r.set(session_key % user_pk, pickle.dumps(cookies), ex=cookie_expiration) - return cookies + r.set( + session_key % user_pk, + pickle.dumps((cookies, proxy)), + ex=cookie_expiration, + ) + return cookies, proxy def get_pacer_cookie_from_cache( From 8b3417e0e47ff7cb8446d67a2655c7a326dd26d1 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 15:45:50 -0400 Subject: [PATCH 044/372] feat(corpus_importer): Updates tasks logic and signature for improved cookie handling - Updates tasks logic to accommodate the new format for PACER user cookies. - Updates function signature to reflect the change in cookie data format. --- cl/corpus_importer/bulk_utils.py | 8 +- cl/corpus_importer/task_canvases.py | 18 +-- cl/corpus_importer/tasks.py | 184 ++++++++++++++++------------ cl/corpus_importer/tests.py | 4 +- 4 files changed, 125 insertions(+), 89 deletions(-) diff --git a/cl/corpus_importer/bulk_utils.py b/cl/corpus_importer/bulk_utils.py index 730a98b61d..711f768366 100644 --- a/cl/corpus_importer/bulk_utils.py +++ b/cl/corpus_importer/bulk_utils.py @@ -94,11 +94,11 @@ def get_petitions( logger.info(f"Sent {i} tasks to celery so far.") logger.info("Doing row %s", i) throttle.maybe_wait() - + cookies_data = pacer_session.cookies, pacer_session.proxy_address chain( - get_pacer_doc_by_rd.s( - rd_pk, pacer_session.cookies, tag=tag_petitions - ).set(queue=q), + get_pacer_doc_by_rd.s(rd_pk, cookies_data, tag=tag_petitions).set( + queue=q + ), extract_recap_pdf.si(rd_pk).set(queue=q), add_items_to_solr.si([rd_pk], "search.RECAPDocument").set(queue=q), ).apply_async() diff --git a/cl/corpus_importer/task_canvases.py b/cl/corpus_importer/task_canvases.py index 143c061417..579d22eacb 100644 --- a/cl/corpus_importer/task_canvases.py +++ b/cl/corpus_importer/task_canvases.py @@ -18,7 +18,9 @@ from cl.search.tasks import add_or_update_recap_docket -def get_docket_and_claims(docket_number, court, case_name, cookies, tags, q): +def get_docket_and_claims( + docket_number, court, case_name, cookies_data, tags, q +): """Get the docket report, claims history report, and save it all to the DB and Solr """ @@ -27,13 +29,13 @@ def get_docket_and_claims(docket_number, court, case_name, cookies, tags, q): pass_through=None, docket_number=docket_number, court_id=court, - cookies=cookies, + cookies_data=cookies_data, case_name=case_name, docket_number_letters="bk", ).set(queue=q), get_docket_by_pacer_case_id.s( court_id=court, - cookies=cookies, + cookies_data=cookies_data, tag_names=tags, **{ "show_parties_and_counsel": True, @@ -41,9 +43,9 @@ def get_docket_and_claims(docket_number, court, case_name, cookies, tags, q): "show_list_of_member_cases": False, } ).set(queue=q), - get_bankr_claims_registry.s(cookies=cookies, tag_names=tags).set( - queue=q - ), + get_bankr_claims_registry.s( + cookies_data=cookies_data, tag_names=tags + ).set(queue=q), add_or_update_recap_docket.s().set(queue=q), ).apply_async() @@ -72,7 +74,9 @@ def get_district_attachment_pages(options, rd_pks, tag_names, session): break throttle.maybe_wait() chain( - get_attachment_page_by_rd.s(rd_pk, session.cookies).set(queue=q), + get_attachment_page_by_rd.s( + rd_pk, (session.cookies, session.proxy_address) + ).set(queue=q), make_attachment_pq_object.s(rd_pk, recap_user.pk).set(queue=q), process_recap_attachment.s(tag_names=tag_names).set(queue=q), ).apply_async() diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py index 09b2a10526..982e097bf2 100644 --- a/cl/corpus_importer/tasks.py +++ b/cl/corpus_importer/tasks.py @@ -335,7 +335,7 @@ def get_and_save_free_document_report( :param end: a date object representing the last day to get results. :return: The status code of the scrape """ - cookies = get_or_cache_pacer_cookies( + cookies, proxy_address = get_or_cache_pacer_cookies( "pacer_scraper", username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, @@ -344,6 +344,7 @@ def get_and_save_free_document_report( cookies=cookies, username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, + proxy=proxy_address, ) report = FreeOpinionReport(court_id, s) msg = "" @@ -605,14 +606,14 @@ def get_and_process_free_pdf( return None raise self.retry() - cookies = get_or_cache_pacer_cookies( + cookies_data = get_or_cache_pacer_cookies( "pacer_scraper", username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) try: r, r_msg = download_pacer_pdf_by_rd( - rd.pk, result.pacer_case_id, result.pacer_doc_id, cookies + rd.pk, result.pacer_case_id, result.pacer_doc_id, cookies_data ) except HTTPError as exc: if exc.response and exc.response.status_code in [ @@ -938,12 +939,12 @@ def get_pacer_case_id_and_title( pass_through: Any, docket_number: str, court_id: str, - cookies: Optional[RequestsCookieJar] = None, - user_pk: Optional[int] = None, - case_name: Optional[str] = None, - office_number: Optional[str] = None, - docket_number_letters: Optional[str] = None, -) -> Optional[TaskData]: + cookies_data: tuple[RequestsCookieJar, str] | None = None, + user_pk: int | None = None, + case_name: str | None = None, + office_number: str | None = None, + docket_number_letters: str | None = None, +) -> TaskData | None: """Get the pacer_case_id and title values for a district court docket. Use heuristics to disambiguate the results. @@ -959,8 +960,9 @@ def get_pacer_case_id_and_title( :param docket_number: The docket number to look up. This is a flexible field that accepts a variety of docket number styles. :param court_id: The CourtListener court ID for the docket number - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-in PACER user. + :param cookies_data: A tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address (optional) + as a string. :param user_pk: The PK of a user making the request. This can be provided instead of the cookies parameter. If so, this will get the user's cookies from redis instead of passing them in as an argument. @@ -988,10 +990,21 @@ def get_pacer_case_id_and_title( docket_number, court_id, ) - if not cookies: - # Get cookies from Redis if not provided - cookies = get_pacer_cookie_from_cache(user_pk) # type: ignore - s = ProxyPacerSession(cookies=cookies) + + if cookies_data: + cookies, proxy_address = cookies_data + elif user_pk: + cookies_from_cache = get_pacer_cookie_from_cache(user_pk) + if isinstance(cookies_from_cache, tuple): + cookies, proxy_address = cookies_from_cache + cookies, proxy_address = cookies_from_cache, settings.EGRESS_PROXY_HOST + else: + # If the user_pk is unavailable, cookies cannot be retrieved from the + # cache. + self.request.chain = None + return None + + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) report = PossibleCaseNumberApi(map_cl_to_pacer_id(court_id), s) msg = "" try: @@ -1040,9 +1053,9 @@ def do_case_query_by_pacer_case_id( self: Task, data: TaskData, court_id: str, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], tag_names: List[str] | None = None, -) -> Optional[TaskData]: +) -> TaskData | None: """Run a case query (iquery.pl) query on a case and save the data :param self: The celery task @@ -1056,7 +1069,8 @@ def do_case_query_by_pacer_case_id( saving it in the DB. :return: A dict with the pacer_case_id and docket_pk values. """ - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = cookies_data + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) if data is None: logger.info("Empty data argument. Terminating chains and exiting.") self.request.chain = None @@ -1165,7 +1179,7 @@ def query_case_query_report( :return: The report.data. """ - cookies = get_or_cache_pacer_cookies( + cookies, proxy_address = get_or_cache_pacer_cookies( "pacer_scraper", settings.PACER_USERNAME, password=settings.PACER_PASSWORD, @@ -1174,6 +1188,7 @@ def query_case_query_report( cookies=cookies, username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, + proxy=proxy_address, ) report = CaseQuery(map_cl_to_pacer_id(court_id), s) report.query(pacer_case_id) @@ -1481,11 +1496,11 @@ def get_docket_by_pacer_case_id( self: Task, data: TaskData, court_id: str, - cookies: Optional[RequestsCookieJar] = None, + cookies_data: tuple[RequestsCookieJar, str], docket_pk: Optional[int] = None, tag_names: Optional[str] = None, **kwargs, -) -> Optional[TaskData]: +) -> TaskData | None: """Get a docket by PACER case id, CL court ID, and a collection of kwargs that can be passed to the DocketReport query. @@ -1497,8 +1512,9 @@ def get_docket_by_pacer_case_id( Optional: 'docket_pk': The ID of the docket to work on to avoid lookups if it's known in advance. :param court_id: A courtlistener court ID. - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-in PACER user. + :param cookies_data: A tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :param docket_pk: The PK of the docket to update. Can also be provided in the data param, above. :param tag_names: A list of tag names that should be stored with the item @@ -1532,7 +1548,8 @@ def get_docket_by_pacer_case_id( logging_id = f"{court_id}.{pacer_case_id}" logger.info("Querying docket report %s", logging_id) - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = cookies_data + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) report = DocketReport(map_cl_to_pacer_id(court_id), s) try: report.query(pacer_case_id, **kwargs) @@ -1583,7 +1600,7 @@ def get_appellate_docket_by_docket_number( self: Task, docket_number: str, court_id: str, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], tag_names: Optional[List[str]] = None, **kwargs, ) -> Optional[TaskData]: @@ -1595,13 +1612,15 @@ def get_appellate_docket_by_docket_number( :param self: The celery task :param docket_number: The docket number of the case. :param court_id: A courtlistener/PACER appellate court ID. - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-in PACER user. + :param cookies_data: A tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :param tag_names: The tag name that should be stored with the item in the DB, if desired. :param kwargs: A variety of keyword args to pass to DocketReport.query(). """ - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = cookies_data + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) report = AppellateDocketReport(court_id, s) logging_id = f"{court_id} - {docket_number}" logger.info("Querying docket report %s", logging_id) @@ -1651,20 +1670,21 @@ def get_appellate_docket_by_docket_number( def get_att_report_by_rd( rd: RECAPDocument, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], ) -> Optional[AttachmentPage]: """Method to get the attachment report for the item in PACER. :param rd: The RECAPDocument object to use as a source. - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-on PACER user. + :param cookies_data: A tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :return: The attachment report populated with the results """ - if not rd.pacer_doc_id: return None - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = cookies_data + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) pacer_court_id = map_cl_to_pacer_id(rd.docket_entry.docket.court_id) att_report = AttachmentPage(pacer_court_id, s) att_report.query(rd.pacer_doc_id) @@ -1682,14 +1702,15 @@ def get_att_report_by_rd( def get_attachment_page_by_rd( self: Task, rd_pk: int, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], ) -> Optional[AttachmentPage]: """Get the attachment page for the item in PACER. :param self: The celery task :param rd_pk: The PK of a RECAPDocument object to use as a source. - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-on PACER user. + :param cookies_data: tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :return: The attachment report populated with the results """ rd = RECAPDocument.objects.get(pk=rd_pk) @@ -1698,7 +1719,7 @@ def get_attachment_page_by_rd( self.request.chain = None return None try: - att_report = get_att_report_by_rd(rd, cookies) + att_report = get_att_report_by_rd(rd, cookies_data) except HTTPError as exc: if exc.response and exc.response.status_code in [ HTTPStatus.INTERNAL_SERVER_ERROR, @@ -1736,21 +1757,23 @@ def get_attachment_page_by_rd( def get_bankr_claims_registry( self: Task, data: TaskData, - cookies: RequestsCookieJar, - tag_names: Optional[List[str]] = None, -) -> Optional[TaskData]: + cookies_data: tuple[RequestsCookieJar, str], + tag_names: List[str] | None = None, +) -> TaskData | None: """Get the bankruptcy claims registry for a docket :param self: The celery task :param data: A dict of data containing, primarily, a key to 'docket_pk' for the docket for which we want to get the registry. Other keys will be ignored. - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-in PACER user. + :param cookies_data: A tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :param tag_names: A list of tag names that should be stored with the claims registry information in the DB. """ - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = cookies_data + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) if data is None or data.get("docket_pk") is None: logger.warning( "Empty data argument or parameter. Terminating chains " @@ -1848,8 +1871,8 @@ def download_pacer_pdf_by_rd( rd_pk: int, pacer_case_id: str, pacer_doc_id: int, - cookies: RequestsCookieJar, - magic_number: Optional[str] = None, + cookies_data: tuple[RequestsCookieJar, str], + magic_number: str | None = None, ) -> tuple[Response | None, str]: """Using a RECAPDocument object ID, download the PDF if it doesn't already exist. @@ -1857,18 +1880,19 @@ def download_pacer_pdf_by_rd( :param rd_pk: The PK of the RECAPDocument to download :param pacer_case_id: The internal PACER case ID number :param pacer_doc_id: The internal PACER document ID to download - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-in PACER user. + :param cookies_data: A tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :param magic_number: The magic number to fetch PACER documents for free this is an optional field, only used by RECAP Email documents :return: A two-tuple of requests.Response object usually containing a PDF, or None if that wasn't possible, and a string representing the error if there was one. """ - + cookies, proxy_address = cookies_data rd = RECAPDocument.objects.get(pk=rd_pk) pacer_court_id = map_cl_to_pacer_id(rd.docket_entry.docket.court_id) - s = ProxyPacerSession(cookies=cookies) + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) report = FreeOpinionReport(pacer_court_id, s) r, r_msg = report.download_pdf(pacer_case_id, pacer_doc_id, magic_number) @@ -1880,7 +1904,7 @@ def download_pdf_by_magic_number( court_id: str, pacer_doc_id: str, pacer_case_id: str, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], magic_number: str, appellate: bool = False, ) -> tuple[Response | None, str]: @@ -1889,15 +1913,17 @@ def download_pdf_by_magic_number( :param court_id: A CourtListener court ID to query the free document. :param pacer_doc_id: The pacer_doc_id to query the free document. :param pacer_case_id: The pacer_case_id to query the free document. - :param cookies: The cookies of a logged in PACER session + :param cookies_data: tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :param magic_number: The magic number to fetch PACER documents for free. :param appellate: Whether the download belongs to an appellate court. :return: A two-tuple of requests.Response object usually containing a PDF, or None if that wasn't possible, and a string representing the error if there was one. """ - - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = cookies_data + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) report = FreeOpinionReport(court_id, s) r, r_msg = report.download_pdf( pacer_case_id, pacer_doc_id, magic_number, appellate @@ -1916,10 +1942,10 @@ def get_document_number_from_confirmation_page( """ recap_email_user = User.objects.get(username="recap-email") - cookies = get_or_cache_pacer_cookies( + cookies, proxy_address = get_or_cache_pacer_cookies( recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD ) - s = ProxyPacerSession(cookies=cookies) + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) doc_num_report = DownloadConfirmationPage(court_id, s) doc_num_report.query(pacer_doc_id) data = doc_num_report.data @@ -1990,11 +2016,10 @@ def is_pacer_doc_sealed(court_id: str, pacer_doc_id: str) -> bool: """ recap_email_user = User.objects.get(username="recap-email") - cookies = get_or_cache_pacer_cookies( + cookies, proxy_address = get_or_cache_pacer_cookies( recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD ) - - s = ProxyPacerSession(cookies=cookies) + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) receipt_report = DownloadConfirmationPage(court_id, s) receipt_report.query(pacer_doc_id) data = receipt_report.data @@ -2021,11 +2046,11 @@ def is_docket_entry_sealed( return False recap_email_user = User.objects.get(username="recap-email") - cookies = get_or_cache_pacer_cookies( + cookies, proxy_address = get_or_cache_pacer_cookies( recap_email_user.pk, settings.PACER_USERNAME, settings.PACER_PASSWORD ) - s = ProxyPacerSession(cookies=cookies) + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) report = BaseReport(court_id, s) return report.is_entry_sealed(case_id, doc_id) @@ -2128,14 +2153,16 @@ def add_tags(rd: RECAPDocument, tag_name: Optional[str]) -> None: def get_pacer_doc_by_rd( self: Task, rd_pk: int, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], tag: Optional[str] = None, ) -> Optional[int]: """A simple method for getting the PDF associated with a RECAPDocument. :param self: The bound celery task :param rd_pk: The PK for the RECAPDocument object - :param cookies: The cookies of a logged in PACER session + :param cookies_data: tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :param tag: The name of a tag to apply to any modified items :return: The RECAPDocument PK """ @@ -2148,7 +2175,7 @@ def get_pacer_doc_by_rd( pacer_case_id = rd.docket_entry.docket.pacer_case_id r, r_msg = download_pacer_pdf_by_rd( - rd.pk, pacer_case_id, rd.pacer_doc_id, cookies + rd.pk, pacer_case_id, rd.pacer_doc_id, cookies_data ) court_id = rd.docket_entry.docket.court_id @@ -2186,7 +2213,7 @@ def get_pacer_doc_by_rd_and_description( self: Task, rd_pk: int, description_re: Pattern, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], fallback_to_main_doc: bool = False, tag_name: Optional[List[str]] = None, ) -> None: @@ -2200,15 +2227,16 @@ def get_pacer_doc_by_rd_and_description( :param rd_pk: The PK of a RECAPDocument object to use as a source. :param description_re: A compiled regular expression to search against the description provided by the attachment page. - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-in PACER user. + :param cookies_data: tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :param fallback_to_main_doc: Should we grab the main doc if none of the attachments match the regex? :param tag_name: A tag name to apply to any downloaded content. :return: None """ rd = RECAPDocument.objects.get(pk=rd_pk) - att_report = get_attachment_page_by_rd(self, rd_pk, cookies) + att_report = get_attachment_page_by_rd(self, rd_pk, cookies_data) att_found = None for attachment in att_report.data.get("attachments", []): @@ -2257,7 +2285,7 @@ def get_pacer_doc_by_rd_and_description( pacer_case_id = rd.docket_entry.docket.pacer_case_id r, r_msg = download_pacer_pdf_by_rd( - rd.pk, pacer_case_id, att_found["pacer_doc_id"], cookies + rd.pk, pacer_case_id, att_found["pacer_doc_id"], cookies_data ) court_id = rd.docket_entry.docket.court_id @@ -2295,18 +2323,20 @@ def get_pacer_doc_by_rd_and_description( def get_pacer_doc_id_with_show_case_doc_url( self: Task, rd_pk: int, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], ) -> None: """use the show_case_doc URL to get pacer_doc_id values. :param self: The celery task :param rd_pk: The pk of the RECAPDocument you want to get. - :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a - logged-in PACER user. + :param cookies_data: tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. """ rd = RECAPDocument.objects.get(pk=rd_pk) d = rd.docket_entry.docket - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = cookies_data + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) pacer_court_id = map_cl_to_pacer_id(d.court_id) report = ShowCaseDocApi(pacer_court_id, s) last_try = self.request.retries == self.max_retries @@ -2396,7 +2426,7 @@ def make_list_of_creditors_key(court_id: str, d_number_file_name: str) -> str: @throttle_task("1/s", key="court_id") def query_and_save_list_of_creditors( self: Task, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], court_id: str, d_number_file_name: str, docket_number: str, @@ -2408,7 +2438,9 @@ def query_and_save_list_of_creditors( HTML and pipe-limited text files and convert them to CSVs. :param self: The celery task - :param cookies: The cookies for the current PACER session. + :param cookies_data: tuple containing the PACER user's cookies + (`requests.cookies.RequestsCookieJar`) and the proxy address used to login + as a string. :param court_id: The court_id for the bankruptcy court. :param d_number_file_name: The docket number to use as file name. :param docket_number: The docket number of the case. @@ -2418,8 +2450,8 @@ def query_and_save_list_of_creditors( :return: None """ - - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = cookies_data + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) try: report = ListOfCreditors(court_id, s) except AssertionError: diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py index fc745568d4..6291121f39 100644 --- a/cl/corpus_importer/tests.py +++ b/cl/corpus_importer/tests.py @@ -497,7 +497,7 @@ def test_party_parsing(self) -> None: @patch( "cl.corpus_importer.tasks.get_or_cache_pacer_cookies", - return_value=None, + return_value=(None, None), ) def test_get_and_save_free_document_report(self, mock_cookies) -> None: """Test the retrieval and storage of free document report data.""" @@ -3342,7 +3342,7 @@ def test_merger(self): @patch( "cl.corpus_importer.tasks.get_or_cache_pacer_cookies", - return_value=None, + return_value=(None, None), ) @override_settings( IQUERY_PROBE_DAEMON_ENABLED=True, From 588818d389295a47865f8ecea73e43749a40a4a6 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 15:47:09 -0400 Subject: [PATCH 045/372] feat(recap): Tweaks tasks to handle the new format for user cookies. --- cl/recap/tasks.py | 49 +++++++++++++++++++++++++++++++++++------------ cl/recap/tests.py | 8 ++++---- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py index c592024d98..6f32dbed06 100644 --- a/cl/recap/tasks.py +++ b/cl/recap/tasks.py @@ -1558,13 +1558,18 @@ def fetch_pacer_doc_by_rd( self.request.chain = None return + cookies_data = ( + cookies + if isinstance(cookies, tuple) + else (cookies, settings.EGRESS_PROXY_HOST) + ) pacer_case_id = rd.docket_entry.docket.pacer_case_id try: r, r_msg = download_pacer_pdf_by_rd( rd.pk, pacer_case_id, rd.pacer_doc_id, - cookies, + cookies_data, magic_number, ) except (requests.RequestException, HTTPError): @@ -1656,8 +1661,13 @@ def fetch_attachment_page(self: Task, fq_pk: int) -> None: mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED) return + cookies_data = ( + cookies + if isinstance(cookies, tuple) + else (cookies, settings.EGRESS_PROXY_HOST) + ) try: - r = get_att_report_by_rd(rd, cookies) + r = get_att_report_by_rd(rd, cookies_data) except HTTPError as exc: msg = "Failed to get attachment page from network." if exc.response.status_code in [ @@ -1829,14 +1839,19 @@ def fetch_docket(self, fq_pk): async_to_sync(mark_pq_status)(fq, "", PROCESSING_STATUS.IN_PROGRESS) - cookies = get_pacer_cookie_from_cache(fq.user_id) - if cookies is None: + cookies_data = get_pacer_cookie_from_cache(fq.user_id) + if cookies_data is None: msg = f"Cookie cache expired before task could run for user: {fq.user_id}" mark_fq_status(fq, msg, PROCESSING_STATUS.FAILED) self.request.chain = None return None - s = ProxyPacerSession(cookies=cookies) + cookies, proxy_address = ( + cookies_data + if isinstance(cookies_data, tuple) + else (cookies_data, settings.EGRESS_PROXY_HOST) + ) + s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) try: result = fetch_pacer_case_id_and_title(s, fq, court_id) except (requests.RequestException, ReadTimeoutError) as exc: @@ -2075,7 +2090,7 @@ def save_pacer_doc_from_pq( def download_pacer_pdf_and_save_to_pq( court_id: str, - cookies: RequestsCookieJar, + cookies_data: tuple[RequestsCookieJar, str], cutoff_date: datetime, magic_number: str | None, pacer_case_id: str, @@ -2091,7 +2106,7 @@ def download_pacer_pdf_and_save_to_pq( PQ object. Increasing the reliability of saving PACER documents. :param court_id: A CourtListener court ID to query the free document. - :param cookies: The cookies of a logged in PACER session + :param cookies_data: The cookies of a logged in PACER session :param cutoff_date: The datetime from which we should query ProcessingQueue objects. For the main RECAPDocument the datetime the EmailProcessingQueue was created. For attachments the datetime the @@ -2128,7 +2143,7 @@ def download_pacer_pdf_and_save_to_pq( court_id, pacer_doc_id, pacer_case_id, - cookies, + cookies_data, magic_number, appellate, ) @@ -2175,13 +2190,18 @@ def get_and_copy_recap_attachment_docs( """ cookies = get_pacer_cookie_from_cache(user_pk) + cookies_data = ( + cookies + if isinstance(cookies, tuple) + else (cookies, settings.EGRESS_PROXY_HOST) + ) appellate = False unique_pqs = [] for rd_att in att_rds: cutoff_date = rd_att.date_created pq = download_pacer_pdf_and_save_to_pq( court_id, - cookies, + cookies_data, cutoff_date, magic_number, pacer_case_id, @@ -2286,6 +2306,11 @@ def get_and_merge_rd_attachments( all_attachment_rds = [] cookies = get_pacer_cookie_from_cache(user_pk) + cookies_data = ( + cookies + if isinstance(cookies, tuple) + else (cookies, settings.EGRESS_PROXY_HOST) + ) # Try to get the attachment page without being logged into PACER att_report_text = get_attachment_page_by_url(document_url, court_id) if att_report_text: @@ -2297,7 +2322,7 @@ def get_and_merge_rd_attachments( .recap_documents.earliest("date_created") ) # Get the attachment page being logged into PACER - att_report = get_att_report_by_rd(main_rd, cookies) + att_report = get_att_report_by_rd(main_rd, cookies_data) for docket_entry in dockets_updated: # Merge the attachments for each docket/recap document @@ -2383,7 +2408,7 @@ def process_recap_email( start_time = now() # Ensures we have PACER cookies ready to go. - cookies = get_or_cache_pacer_cookies( + cookies_data = get_or_cache_pacer_cookies( user_pk, settings.PACER_USERNAME, settings.PACER_PASSWORD ) appellate = data["appellate"] @@ -2391,7 +2416,7 @@ def process_recap_email( # its future processing. pq = download_pacer_pdf_and_save_to_pq( epq.court_id, - cookies, + cookies_data, epq.date_created, magic_number, pacer_case_id, diff --git a/cl/recap/tests.py b/cl/recap/tests.py index c01961b029..f04ee7e18e 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -1205,7 +1205,7 @@ async def test_missing_receipt_properties_fails(self): ) @mock.patch( "cl.recap.tasks.get_or_cache_pacer_cookies", - side_effect=lambda x, y, z: None, + side_effect=lambda x, y, z: (None, None), ) @mock.patch( "cl.recap.tasks.is_docket_entry_sealed", @@ -2857,7 +2857,7 @@ def test_create_from_idb_chunk(self) -> None: ) @mock.patch( "cl.recap.tasks.get_or_cache_pacer_cookies", - side_effect=lambda x, y, z: None, + side_effect=lambda x, y, z: (None, None), ) @mock.patch( "cl.recap.tasks.is_pacer_court_accessible", @@ -5315,7 +5315,7 @@ def test_clean_up_recap_document_file(self, mock_open): ) @mock.patch( "cl.recap.tasks.get_or_cache_pacer_cookies", - side_effect=lambda x, y, z: "Cookie", + side_effect=lambda x, y, z: ("Cookie", settings.EGRESS_PROXY_HOST), ) @mock.patch( "cl.recap.tasks.get_pacer_cookie_from_cache", @@ -5689,7 +5689,7 @@ def test_is_pacer_court_accessible_fails( ) @mock.patch( "cl.recap.tasks.get_or_cache_pacer_cookies", - side_effect=lambda x, y, z: None, + side_effect=lambda x, y, z: (None, None), ) @mock.patch( "cl.recap.tasks.is_pacer_court_accessible", From 84894190c38e026545fcefe73bd1b82e421f03c5 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 15:51:49 -0400 Subject: [PATCH 046/372] feat(scrapers): Updates update_docket_info_iquery task - Adapts update_docket_info_iquery task to handle the updated output format of get_or_cache_pacer_cookies. - Enhances task functionality by passing the proxy parameter to the ProxyPacerSession class --- cl/scrapers/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cl/scrapers/tasks.py b/cl/scrapers/tasks.py index 504db45df9..d4a7fd2cc2 100644 --- a/cl/scrapers/tasks.py +++ b/cl/scrapers/tasks.py @@ -404,7 +404,7 @@ def update_docket_info_iquery(self, d_pk: int, court_id: str) -> None: :param court_id: The court of the docket. Needed for throttling by court. :return: None """ - cookies = get_or_cache_pacer_cookies( + cookies, proxy = get_or_cache_pacer_cookies( "pacer_scraper", settings.PACER_USERNAME, password=settings.PACER_PASSWORD, @@ -413,6 +413,7 @@ def update_docket_info_iquery(self, d_pk: int, court_id: str) -> None: cookies=cookies, username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, + proxy=proxy, ) d = Docket.objects.get(pk=d_pk, court_id=court_id) report = CaseQuery(map_cl_to_pacer_id(d.court_id), s) From b93fd90519c3423c59b4306541e07d97a14c1378 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 16:55:02 -0400 Subject: [PATCH 047/372] feat(corpus_importer): Updates commands to use the new cookie format --- .../management/commands/760_project.py | 6 +++--- .../management/commands/adelman_david.py | 8 ++++---- .../management/commands/buchwald_project.py | 4 ++-- .../management/commands/buried_alive_project.py | 5 ++++- .../management/commands/everything_project.py | 4 ++-- .../management/commands/export_control.py | 2 +- .../management/commands/import_patent.py | 6 +++--- .../management/commands/invoice_project.py | 14 ++++++++------ .../management/commands/jackson_project.py | 2 +- .../management/commands/kessler_ilnb.py | 14 +++++++++++--- .../management/commands/legal_robot.py | 6 +++--- .../commands/list_of_creditors_project.py | 2 +- cl/corpus_importer/management/commands/nos_700.py | 4 ++-- .../management/commands/nywb_chapter_7.py | 10 ++++++++-- .../management/commands/merge_idb_into_dockets.py | 2 +- 15 files changed, 54 insertions(+), 35 deletions(-) diff --git a/cl/corpus_importer/management/commands/760_project.py b/cl/corpus_importer/management/commands/760_project.py index b4a227f0aa..37c5785eef 100644 --- a/cl/corpus_importer/management/commands/760_project.py +++ b/cl/corpus_importer/management/commands/760_project.py @@ -55,7 +55,7 @@ def get_dockets(options): get_appellate_docket_by_docket_number.s( docket_number=row["Cleaned case_No"], court_id=row["fjc_court_id"], - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), tag_names=[TAG], **{ "show_docket_entries": True, @@ -75,12 +75,12 @@ def get_dockets(options): pass_through=None, docket_number=row["Cleaned case_No"], court_id=row["fjc_court_id"], - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), case_name=row["Title"], ).set(queue=q), get_docket_by_pacer_case_id.s( court_id=row["fjc_court_id"], - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), tag_names=[TAG], **{ "show_parties_and_counsel": True, diff --git a/cl/corpus_importer/management/commands/adelman_david.py b/cl/corpus_importer/management/commands/adelman_david.py index f24f58cae3..2844e4ff2b 100644 --- a/cl/corpus_importer/management/commands/adelman_david.py +++ b/cl/corpus_importer/management/commands/adelman_david.py @@ -48,7 +48,7 @@ def download_dockets(options): get_appellate_docket_by_docket_number.s( docket_number=row["docket_no1"], court_id=row["cl_court"], - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), tag_names=[PROJECT_TAG_NAME, row_tag], # Do not get the docket entries for now. We're only # interested in the date terminated. If it's an open case, @@ -71,17 +71,17 @@ def download_dockets(options): pass_through=None, docket_number=row["docket_no1"], court_id=row["cl_court"], - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), case_name=row["name"], ).set(queue=q), do_case_query_by_pacer_case_id.s( court_id=row["cl_court"], - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), tag_names=[PROJECT_TAG_NAME, row_tag], ).set(queue=q), get_docket_by_pacer_case_id.s( court_id=row["cl_court"], - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), tag_names=[PROJECT_TAG_NAME, row_tag], **{ # No docket entries diff --git a/cl/corpus_importer/management/commands/buchwald_project.py b/cl/corpus_importer/management/commands/buchwald_project.py index 7beb4865af..6b6dbba000 100644 --- a/cl/corpus_importer/management/commands/buchwald_project.py +++ b/cl/corpus_importer/management/commands/buchwald_project.py @@ -59,7 +59,7 @@ def add_all_nysd_to_cl(options): throttle.maybe_wait() logger.info("Doing pacer_case_id: %s", pacer_case_id) make_docket_by_iquery.apply_async( - args=("nysd", pacer_case_id, session.cookies, [NYSD_TAG]), + args=("nysd", pacer_case_id, "default", [NYSD_TAG]), queue=q, ) @@ -104,7 +104,7 @@ def get_dockets(options): get_docket_by_pacer_case_id.s( data={"pacer_case_id": d.pacer_case_id}, court_id=d.court_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), docket_pk=d.pk, tag_names=[BUCKWALD_TAG], **{ diff --git a/cl/corpus_importer/management/commands/buried_alive_project.py b/cl/corpus_importer/management/commands/buried_alive_project.py index 880176072e..f84ee6a16a 100644 --- a/cl/corpus_importer/management/commands/buried_alive_project.py +++ b/cl/corpus_importer/management/commands/buried_alive_project.py @@ -64,7 +64,10 @@ def get_pacer_dockets(options, docket_pks, tags): get_docket_by_pacer_case_id.s( {"pacer_case_id": d.pacer_case_id, "docket_pk": d.pk}, d.court_id, - cookies=pacer_session.cookies, + cookies_data=( + pacer_session.cookies, + pacer_session.proxy_address, + ), tag_names=tags, **{ "show_parties_and_counsel": True, diff --git a/cl/corpus_importer/management/commands/everything_project.py b/cl/corpus_importer/management/commands/everything_project.py index 3ea7d27eb2..a74b9328ff 100644 --- a/cl/corpus_importer/management/commands/everything_project.py +++ b/cl/corpus_importer/management/commands/everything_project.py @@ -141,13 +141,13 @@ def get_dockets(options, items, tags, sample_size=0, doc_num_end=""): pass_through=None, docket_number=row.docket_number, court_id=row.district_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), **params, ).set(queue=q), filter_docket_by_tags.s(tags, row.district_id).set(queue=q), get_docket_by_pacer_case_id.s( court_id=row.district_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), tag_names=tags, **{ "show_parties_and_counsel": True, diff --git a/cl/corpus_importer/management/commands/export_control.py b/cl/corpus_importer/management/commands/export_control.py index da434bd83f..518f22a61c 100644 --- a/cl/corpus_importer/management/commands/export_control.py +++ b/cl/corpus_importer/management/commands/export_control.py @@ -85,7 +85,7 @@ def get_data(options, row_transform, tags): row["docket_number"], row["court"], row["case_name"], - session.cookies, + (session.cookies, session.proxy_address), tags, q, ) diff --git a/cl/corpus_importer/management/commands/import_patent.py b/cl/corpus_importer/management/commands/import_patent.py index f207f649ab..1762126457 100644 --- a/cl/corpus_importer/management/commands/import_patent.py +++ b/cl/corpus_importer/management/commands/import_patent.py @@ -78,12 +78,12 @@ def get_dockets(options: dict) -> None: pass_through=None, docket_number=item.docket_number, court_id=item.district_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), **params, ).set(queue=q), get_docket_by_pacer_case_id.s( court_id=item.district_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), tag_names=PATENT_TAGS, **{ "show_parties_and_counsel": True, @@ -101,7 +101,7 @@ def get_dockets(options: dict) -> None: get_docket_by_pacer_case_id.s( data={"pacer_case_id": d.pacer_case_id}, court_id=d.court_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), docket_pk=d.pk, tag_names=PATENT_TAGS, **{ diff --git a/cl/corpus_importer/management/commands/invoice_project.py b/cl/corpus_importer/management/commands/invoice_project.py index 8f3f889c34..d0195491ea 100644 --- a/cl/corpus_importer/management/commands/invoice_project.py +++ b/cl/corpus_importer/management/commands/invoice_project.py @@ -83,9 +83,9 @@ def get_attachment_pages(options): throttle.maybe_wait() chain( # Query the attachment page and process it - get_attachment_page_by_rd.s(result["id"], session.cookies).set( - queue=q - ), + get_attachment_page_by_rd.s( + result["id"], (session.cookies, session.proxy_address) + ).set(queue=q), # Take that in a new task and make a PQ object make_attachment_pq_object.s(result["id"], recap_user.pk).set( queue=q @@ -150,9 +150,11 @@ def get_documents(options): continue chain( - get_pacer_doc_by_rd.s(rd.pk, session.cookies, tag=TAG_PHASE_2).set( - queue=q - ), + get_pacer_doc_by_rd.s( + rd.pk, + (session.cookies, session.proxy_address), + tag=TAG_PHASE_2, + ).set(queue=q), extract_recap_pdf.si(rd.pk).set(queue=q), add_items_to_solr.si([rd.pk], "search.RECAPDocument").set(queue=q), ).apply_async() diff --git a/cl/corpus_importer/management/commands/jackson_project.py b/cl/corpus_importer/management/commands/jackson_project.py index 1e7fd98e3b..f4b420de3f 100644 --- a/cl/corpus_importer/management/commands/jackson_project.py +++ b/cl/corpus_importer/management/commands/jackson_project.py @@ -41,7 +41,7 @@ def get_dockets(options): get_docket_by_pacer_case_id.s( data={"pacer_case_id": d.pacer_case_id}, court_id=d.court_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), docket_pk=d.pk, tag_names=[JACKSON_TAG], **{ diff --git a/cl/corpus_importer/management/commands/kessler_ilnb.py b/cl/corpus_importer/management/commands/kessler_ilnb.py index a3ad701b23..d70df6e92e 100644 --- a/cl/corpus_importer/management/commands/kessler_ilnb.py +++ b/cl/corpus_importer/management/commands/kessler_ilnb.py @@ -60,13 +60,19 @@ def get_dockets(options): row["docket"], row["office"] ), court_id="ilnb", - cookies=pacer_session.cookies, + cookies_data=( + pacer_session.cookies, + pacer_session.proxy_address, + ), office_number=row["office"], docket_number_letters="bk", ).set(queue=q), get_docket_by_pacer_case_id.s( court_id="ilnb", - cookies=pacer_session.cookies, + cookies_data=( + pacer_session.cookies, + pacer_session.proxy_address, + ), tag_names=[TAG], **{ "show_parties_and_counsel": True, @@ -118,7 +124,9 @@ def get_final_docs(options): throttle.maybe_wait() chain( get_pacer_doc_by_rd.s( - rd_pk, pacer_session.cookies, tag=TAG_FINALS + rd_pk, + (pacer_session.cookies, pacer_session.proxy_address), + tag=TAG_FINALS, ).set(queue=q), extract_recap_pdf.si(rd_pk).set(queue=q), add_items_to_solr.si([rd_pk], "search.RECAPDocument").set( diff --git a/cl/corpus_importer/management/commands/legal_robot.py b/cl/corpus_importer/management/commands/legal_robot.py index d6bc38244f..f37685f470 100644 --- a/cl/corpus_importer/management/commands/legal_robot.py +++ b/cl/corpus_importer/management/commands/legal_robot.py @@ -79,9 +79,9 @@ def get_documents(options): continue chain( - get_pacer_doc_by_rd.s(rd.pk, session.cookies, tag=TAG).set( - queue=q - ), + get_pacer_doc_by_rd.s( + rd.pk, (session.cookies, session.proxy_address), tag=TAG + ).set(queue=q), extract_recap_pdf.si(rd.pk).set(queue=q), add_items_to_solr.si([rd.pk], "search.RECAPDocument").set(queue=q), ).apply_async() diff --git a/cl/corpus_importer/management/commands/list_of_creditors_project.py b/cl/corpus_importer/management/commands/list_of_creditors_project.py index 9783903212..83482110b7 100644 --- a/cl/corpus_importer/management/commands/list_of_creditors_project.py +++ b/cl/corpus_importer/management/commands/list_of_creditors_project.py @@ -139,7 +139,7 @@ def query_and_save_creditors_data(options: OptionsType) -> None: ) throttle.maybe_wait() query_and_save_list_of_creditors.si( - session.cookies, + (session.cookies, session.proxy_address), court_id, d_number_file_name, docket_number, diff --git a/cl/corpus_importer/management/commands/nos_700.py b/cl/corpus_importer/management/commands/nos_700.py index 915c030eef..600cf8188f 100644 --- a/cl/corpus_importer/management/commands/nos_700.py +++ b/cl/corpus_importer/management/commands/nos_700.py @@ -257,13 +257,13 @@ def get_dockets(options, items, tags, sample_size=0): pass_through=None, docket_number=row.docket_number, court_id=row.district_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), **params, ).set(queue=q), filter_docket_by_tags.s(tags, row.district_id).set(queue=q), get_docket_by_pacer_case_id.s( court_id=row.district_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), tag_names=tags, **{ "show_parties_and_counsel": True, diff --git a/cl/corpus_importer/management/commands/nywb_chapter_7.py b/cl/corpus_importer/management/commands/nywb_chapter_7.py index 7efa9888fa..d66dd7027a 100644 --- a/cl/corpus_importer/management/commands/nywb_chapter_7.py +++ b/cl/corpus_importer/management/commands/nywb_chapter_7.py @@ -55,13 +55,19 @@ def get_dockets(options): row["DOCKET"], row["OFFICE"] ), court_id="nywb", - cookies=pacer_session.cookies, + cookies_data=( + pacer_session.cookies, + pacer_session.proxy_address, + ), office_number=row["OFFICE"], docket_number_letters="bk", ).set(queue=q), get_docket_by_pacer_case_id.s( court_id="nywb", - cookies=pacer_session.cookies, + cookies_data=( + pacer_session.cookies, + pacer_session.proxy_address, + ), tag_names=[TAG], **{ "doc_num_start": 1, diff --git a/cl/recap/management/commands/merge_idb_into_dockets.py b/cl/recap/management/commands/merge_idb_into_dockets.py index 0fe62e0c85..e8a741020d 100644 --- a/cl/recap/management/commands/merge_idb_into_dockets.py +++ b/cl/recap/management/commands/merge_idb_into_dockets.py @@ -142,7 +142,7 @@ def update_any_missing_pacer_case_ids(options): pass_through=d.pk, docket_number=d.idb_data.docket_number, court_id=d.idb_data.district_id, - cookies=session.cookies, + cookies_data=(session.cookies, session.proxy_address), **params, ).set(queue=q), update_docket_from_hidden_api.s().set(queue=q), From 693552db948b1a1e6971ade141d02bc53ae64315 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Wed, 10 Jul 2024 18:42:07 -0400 Subject: [PATCH 048/372] feat(lib): Adds tests for pacer session utils --- cl/lib/tests.py | 132 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/cl/lib/tests.py b/cl/lib/tests.py index 6507fab826..ff86a720e3 100644 --- a/cl/lib/tests.py +++ b/cl/lib/tests.py @@ -1,8 +1,13 @@ import datetime +import pickle from typing import Tuple, TypedDict, cast +from unittest.mock import patch from asgiref.sync import async_to_sync +from django.conf import settings from django.core.files.base import ContentFile +from django.test import override_settings +from requests.cookies import RequestsCookieJar from cl.lib.date_time import midnight_pt from cl.lib.elasticsearch_utils import append_query_conjunctions @@ -21,6 +26,11 @@ normalize_attorney_role, normalize_us_state, ) +from cl.lib.pacer_session import ( + ProxyPacerSession, + get_or_cache_pacer_cookies, + session_key, +) from cl.lib.privacy_tools import anonymize from cl.lib.ratelimiter import parse_rate from cl.lib.redis_utils import ( @@ -80,6 +90,128 @@ def test_auto_blocking_small_bankr_docket(self) -> None: ) +class TestPacerSessionUtils(TestCase): + + def setUp(self) -> None: + r = get_redis_interface("CACHE", decode_responses=False) + self.test_cookies = RequestsCookieJar() + self.test_cookies.set("PacerSession", "this-is-a-test") + r.set( + session_key % "test_user_old_format", + pickle.dumps(self.test_cookies), + ex=60 * 60, + ) + r.set( + session_key % "test_user_new_format", + pickle.dumps((self.test_cookies, "http://proxy_1:9090")), + ex=60 * 60, + ) + r.set( + session_key % "test_old_format_almost_expired", + pickle.dumps(self.test_cookies), + ex=60, + ) + r.set( + session_key % "test_new_format_almost_expired", + pickle.dumps((self.test_cookies, "http://proxy_1:9090")), + ex=60, + ) + + def test_use_default_proxy_if_list_not_available(self) -> None: + """Does ProxyPacerSession uses the default proxy when no list is provided?""" + session = ProxyPacerSession(username="test", password="password") + self.assertEqual(session.proxy_address, settings.EGRESS_PROXY_HOST) + + @override_settings( + EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] + ) + def test_pick_random_proxy_when_list_is_available(self): + """Does ProxyPacerSession choose a random proxy from the available list?""" + session = ProxyPacerSession(username="test", password="password") + self.assertNotEqual(session.proxy_address, settings.EGRESS_PROXY_HOST) + self.assertIn( + session.proxy_address, + ["http://proxy_1:9090", "http://proxy_2:9090"], + ) + + def test_use_default_proxy_host_for_old_cookie_format(self): + """Can we handle the old cookie format properly?""" + cookies_data = get_or_cache_pacer_cookies( + "test_user_old_format", username="test", password="password" + ) + self.assertIsInstance(cookies_data, tuple) + _, proxy = cookies_data + self.assertEqual(proxy, settings.EGRESS_PROXY_HOST) + + @override_settings( + EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] + ) + @patch("cl.lib.pacer_session.log_into_pacer") + def test_compute_new_cookies_with_new_format(self, mock_log_into_pacer): + """Are we using the tuple format for new cookies?""" + mock_log_into_pacer.return_value = ( + self.test_cookies, + "http://proxy_1:9090", + ) + cookies_data = get_or_cache_pacer_cookies( + "test_user_new_cookie", username="test", password="password" + ) + self.assertIsInstance(cookies_data, tuple) + _, proxy = cookies_data + self.assertEqual(proxy, "http://proxy_1:9090") + + def test_parse_cookie_proxy_pair_properly(self): + """Can we parse the tuple format from cache properly?""" + cookies_data = get_or_cache_pacer_cookies( + "test_user_new_format", username="test", password="password" + ) + self.assertIsInstance(cookies_data, tuple) + _, proxy = cookies_data + self.assertEqual(proxy, "http://proxy_1:9090") + + @override_settings( + EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] + ) + @patch("cl.lib.pacer_session.log_into_pacer") + def test_compute_cookies_for_almost_expired_data( + self, mock_log_into_pacer + ): + """Are we using the tuple format when re-computing session?""" + mock_log_into_pacer.return_value = ( + self.test_cookies, + "http://proxy_1:9090", + ) + + # Attempts to get almost expired cookies with the old format from cache + # Expects refresh. + cookies = get_or_cache_pacer_cookies( + "test_old_format_almost_expired", + username="test", + password="password", + ) + self.assertIsInstance(cookies, tuple) + _, proxy = cookies + self.assertEqual(mock_log_into_pacer.call_count, 1) + self.assertEqual(proxy, "http://proxy_1:9090") + + mock_log_into_pacer.return_value = ( + self.test_cookies, + "http://proxy_2:9090", + ) + + # Attempts to get almost expired cookies with the new format from cache + # Expects refresh. + cookies = get_or_cache_pacer_cookies( + "test_new_format_almost_expired", + username="test", + password="password", + ) + self.assertIsInstance(cookies, tuple) + _, proxy = cookies + self.assertEqual(mock_log_into_pacer.call_count, 2) + self.assertEqual(proxy, "http://proxy_2:9090") + + class TestStringUtils(SimpleTestCase): def test_trunc(self) -> None: """Does trunc give us the results we expect?""" From 5a47599f92b6437929d735197ab2f24492331e9f Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Fri, 12 Jul 2024 12:20:15 -0400 Subject: [PATCH 049/372] feat(pacer_session): Updates logic to pick a proxy connection str --- cl/lib/pacer_session.py | 9 ++------- cl/lib/tests.py | 17 +++-------------- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/cl/lib/pacer_session.py b/cl/lib/pacer_session.py index a7543ad7b4..2fa6094060 100644 --- a/cl/lib/pacer_session.py +++ b/cl/lib/pacer_session.py @@ -47,17 +47,12 @@ def _pick_proxy_connection(self) -> str: """ Picks a proxy connection string from available options. - If the `settings.EGRESS_PROXY_HOSTS` list is empty, this function - returns the value from `settings.EGRESS_PROXY_HOST`. Otherwise, it - randomly chooses a string from the `settings.EGRESS_PROXY_HOSTS` list - and returns it. + this function randomly chooses a string from the + `settings.EGRESS_PROXY_HOSTS` list and returns it. Returns: str: The chosen proxy connection string. """ - if not settings.EGRESS_PROXY_HOSTS: - return settings.EGRESS_PROXY_HOST - return random.choice(settings.EGRESS_PROXY_HOSTS) def _change_protocol(self, url: str) -> str: diff --git a/cl/lib/tests.py b/cl/lib/tests.py index ff86a720e3..da59a87964 100644 --- a/cl/lib/tests.py +++ b/cl/lib/tests.py @@ -90,6 +90,9 @@ def test_auto_blocking_small_bankr_docket(self) -> None: ) +@override_settings( + EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] +) class TestPacerSessionUtils(TestCase): def setUp(self) -> None: @@ -117,14 +120,6 @@ def setUp(self) -> None: ex=60, ) - def test_use_default_proxy_if_list_not_available(self) -> None: - """Does ProxyPacerSession uses the default proxy when no list is provided?""" - session = ProxyPacerSession(username="test", password="password") - self.assertEqual(session.proxy_address, settings.EGRESS_PROXY_HOST) - - @override_settings( - EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] - ) def test_pick_random_proxy_when_list_is_available(self): """Does ProxyPacerSession choose a random proxy from the available list?""" session = ProxyPacerSession(username="test", password="password") @@ -143,9 +138,6 @@ def test_use_default_proxy_host_for_old_cookie_format(self): _, proxy = cookies_data self.assertEqual(proxy, settings.EGRESS_PROXY_HOST) - @override_settings( - EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] - ) @patch("cl.lib.pacer_session.log_into_pacer") def test_compute_new_cookies_with_new_format(self, mock_log_into_pacer): """Are we using the tuple format for new cookies?""" @@ -169,9 +161,6 @@ def test_parse_cookie_proxy_pair_properly(self): _, proxy = cookies_data self.assertEqual(proxy, "http://proxy_1:9090") - @override_settings( - EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] - ) @patch("cl.lib.pacer_session.log_into_pacer") def test_compute_cookies_for_almost_expired_data( self, mock_log_into_pacer From b0b6eb36af8097cab578e7ab5c843a823bc6516b Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Fri, 12 Jul 2024 13:52:52 -0400 Subject: [PATCH 050/372] feat(corpus_importer): Adds an exception to the get_pacer_case_id_and_title helper --- cl/corpus_importer/tasks.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py index 982e097bf2..1711fd8104 100644 --- a/cl/corpus_importer/tasks.py +++ b/cl/corpus_importer/tasks.py @@ -999,10 +999,9 @@ def get_pacer_case_id_and_title( cookies, proxy_address = cookies_from_cache cookies, proxy_address = cookies_from_cache, settings.EGRESS_PROXY_HOST else: - # If the user_pk is unavailable, cookies cannot be retrieved from the - # cache. - self.request.chain = None - return None + raise Exception( + "user_pk is unavailable, cookies cannot be retrieved from cache" + ) s = ProxyPacerSession(cookies=cookies, proxy=proxy_address) report = PossibleCaseNumberApi(map_cl_to_pacer_id(court_id), s) From 02141d97af0b1a8bd7e3447d053d56f168a09f7a Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Fri, 12 Jul 2024 15:17:30 -0400 Subject: [PATCH 051/372] feat(test): Override PROXY_HOSTS setting for tests --- cl/corpus_importer/tests.py | 3 +++ cl/recap/tests.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py index 6291121f39..0ff43d865d 100644 --- a/cl/corpus_importer/tests.py +++ b/cl/corpus_importer/tests.py @@ -401,6 +401,9 @@ def test_get_appellate_court_object_from_string(self) -> None: self.assertEqual(test["a"], got) +@override_settings( + EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] +) @pytest.mark.django_db class PacerDocketParserTest(TestCase): """Can we parse RECAP dockets successfully?""" diff --git a/cl/recap/tests.py b/cl/recap/tests.py index f04ee7e18e..ab63750a59 100644 --- a/cl/recap/tests.py +++ b/cl/recap/tests.py @@ -16,7 +16,7 @@ from django.core import mail from django.core.files.base import ContentFile from django.core.files.uploadedfile import SimpleUploadedFile -from django.test import RequestFactory +from django.test import RequestFactory, override_settings from django.urls import reverse from django.utils.timezone import now from juriscraper.pacer import PacerRssFeed @@ -1145,6 +1145,9 @@ def mock_bucket_open(message_id, r, read_file=False): return recap_mail_example +@override_settings( + EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"] +) class RecapEmailToEmailProcessingQueueTest(TestCase): """Test the rest endpoint, but exclude the processing tasks.""" From f997bcccf160b854aa6860148e5609c8725805e9 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Fri, 12 Jul 2024 16:02:24 -0400 Subject: [PATCH 052/372] fix(tests): Override PROXY_HOSTS setting for the ScrapeIqueryPagesTest class --- cl/corpus_importer/tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py index 9d388f7fb8..13bf5022fc 100644 --- a/cl/corpus_importer/tests.py +++ b/cl/corpus_importer/tests.py @@ -3351,6 +3351,7 @@ def test_merger(self): @override_settings( IQUERY_PROBE_DAEMON_ENABLED=True, IQUERY_SWEEP_UPLOADS_SIGNAL_ENABLED=True, + EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"], ) class ScrapeIqueryPagesTest(TestCase): """Tests related to probe_iquery_pages_daemon command.""" From fb4c668a264319733f9dfb07b2f98a0bf4e18c55 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Fri, 12 Jul 2024 15:31:49 -0600 Subject: [PATCH 053/372] feat(scrape_pacer_free_opinions): sweep options --- .../commands/scrape_pacer_free_opinions.py | 120 ++++++++++++++++-- 1 file changed, 110 insertions(+), 10 deletions(-) diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py index 1b40006ab2..7c0e0d2d52 100644 --- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py +++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py @@ -1,4 +1,5 @@ import argparse +import calendar import datetime import os from typing import Callable, Dict, List, Optional, Tuple, cast @@ -6,6 +7,7 @@ from celery.canvas import chain from django.conf import settings from django.utils.timezone import now +from juriscraper.lib.date_utils import make_date_range_tuples from juriscraper.lib.exceptions import PacerLoginException from juriscraper.lib.string_utils import CaseNameTweaker from requests import RequestException @@ -326,19 +328,116 @@ def ocr_available(options: OptionsType) -> None: logger.info(f"Sent {i + 1}/{count} tasks to celery so far.") -def do_monthly(): - # Run everything monthly range - pass +def do_quarterly(options: OptionsType): + """Collect last quarter documents + Run it every three months (0 0 1 */3 *) -def do_weekly(): - # Run everything weekly range - pass + :return: None + """ + first_day_current_month = datetime.datetime.now().replace(day=1) + + # Calculate the first day of the month three months ago + if first_day_current_month.month <= 3: + start_year = first_day_current_month.year - 1 + start_month = first_day_current_month.month + 9 + else: + start_year = first_day_current_month.year + start_month = first_day_current_month.month - 3 + start_date = datetime.date(start_year, start_month, 1) + + # Calculate the last day of the month prior to today + last_month = first_day_current_month - datetime.timedelta(days=1) + end_day = calendar.monthrange(last_month.year, last_month.month)[1] + end_date = datetime.date(last_month.year, last_month.month, end_day) + + dates = make_date_range_tuples(start_date, end_date, gap=7) + + for _start, _end in dates: + # We run this in 7-day date ranges to ingest all the information on a weekly + # basis and not wait for all the responses from three months ago to now from + # each court. This also allows us to scrape each court every 7 day range to + # avoid possible blockages. + options["date_start"] = _start # type: ignore + options["date_end"] = _end # type: ignore + do_everything(options) + + +def do_monthly(options: OptionsType): + """Collect last month's documents + + Run it on the 3rd of each month to let them update the last days of the month + (15 2 3 * *) + + :return: None + """ + today = datetime.date.today() + prev_month, current_year = ( + (today.month - 1, today.year) + if today.month != 1 + else (12, today.year - 1) + ) + month_last_day = calendar.monthrange(current_year, prev_month)[1] + start = datetime.date(current_year, prev_month, 1) + end = datetime.date(current_year, prev_month, month_last_day) + + # Update options with start and end date of previous month + options["date_start"] = start # type: ignore + options["date_end"] = end # type: ignore + + do_everything(options) + + +def do_weekly(options: OptionsType): + """Collect last week's documents + + Run it every wednesday (* * * * 3) + + :return: None + """ + + today = datetime.date.today() + weekday = today.weekday() + start_of_this_week = today - datetime.timedelta(days=weekday) + start_of_previous_week = start_of_this_week - datetime.timedelta(weeks=1) + end_of_previous_week = start_of_previous_week + datetime.timedelta(days=6) + + # Update options with start and end date of previous week + options["date_start"] = start_of_previous_week # type: ignore + options["date_end"] = end_of_previous_week # type: ignore + + do_everything(options) + + +def do_all(options: OptionsType): + """Collect all documents since the beginning of time + + It was established on this date based on the PacerFreeDocumentLog table. The first + date queried is 1950-05-12 from ca9. + + The command will be executed until the day on which it is executed. + + To collect all documents, weekly, monthly and quarterly sweeps will be used to make + sure we don't miss anything. + + Take note that documents could be missing if they were marked as free after these + periods. + + :return: None + """ + start = datetime.date(1950, 5, 1) + end = datetime.date.today() + dates = make_date_range_tuples(start, end, gap=7) -def do_all(): - # run all courts since first day started to query each court - pass + for _start, _end in dates: + # We run this in 7-day date ranges to ingest all the information on a weekly + # basis and not wait for all the responses from 1950 to now from each court ( + # ~3900 weeks/requests until today). This also allows us to scrape each court + # every 7 day range to avoid possible blockages. + options["date_start"] = _start # type: ignore + options["date_end"] = _end # type: ignore + do_everything(options) def do_everything(options: OptionsType): @@ -387,7 +486,7 @@ def add_arguments(self, parser: argparse.ArgumentParser) -> None: type=str, default=["all"], nargs="*", - help="The courts that you wish to parse.", + help="The courts that you wish to parse. Use cl ids.", ) parser.add_argument( "--date-start", @@ -421,6 +520,7 @@ def handle(self, *args: List[str], **options: OptionsType) -> None: "get-report-results": get_and_save_free_document_reports, "get-pdfs": get_pdfs, "ocr-available": ocr_available, + "do-quarterly": do_quarterly, "do-monthly": do_monthly, "do-weekly": do_weekly, "do-all": do_all, From a3011bb168e1d1dd1234d3e943d4a250c2269428 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Fri, 12 Jul 2024 16:14:17 -0600 Subject: [PATCH 054/372] feat(citation_importer): catch invalid volume numbers Fix start-end/end-row options Add a log message when we already have a citation from a reporter use logger instead of print --- .../commands/import_citations_csv.py | 27 +++++++++++++++---- cl/corpus_importer/utils.py | 3 +++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/cl/citations/management/commands/import_citations_csv.py b/cl/citations/management/commands/import_citations_csv.py index c709ab710e..f23649eeb6 100644 --- a/cl/citations/management/commands/import_citations_csv.py +++ b/cl/citations/management/commands/import_citations_csv.py @@ -38,9 +38,15 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader: end_row = None if options["start_row"] and options["end_row"]: - start_row = options["start_row"] if options["start_row"] > 1 else 0 + start_row = options["start_row"] - 1 if options["start_row"] > 1 else 0 end_row = options["end_row"] - options["start_row"] + 1 # inclusive + if options["start_row"] and not options["end_row"]: + start_row = options["start_row"] - 1 if options["start_row"] > 1 else 0 + + if options["end_row"] and not options["start_row"]: + end_row = options["end_row"] + if options["limit"]: end_row = options["limit"] @@ -78,7 +84,18 @@ def process_csv_data( continue if cluster_id and citation_to_add: - add_citations_to_cluster([citation_to_add], cluster_id) + try: + add_citations_to_cluster([citation_to_add], cluster_id) + except Exception as e: + if "Field 'volume' expected" in str(e): + # Fail silently, we already know this issue + logger.info( + f"Row: {index} - Invalid volume in citation: {citation_to_add} for cluster: {cluster_id}" + ) + continue + else: + # Unknown issue + raise class Command(BaseCommand): @@ -114,15 +131,15 @@ def add_arguments(self, parser): def handle(self, *args, **options): if options["start_row"] and options["end_row"]: if options["start_row"] > options["end_row"]: - print("--start-row can't be greater than --end-row") + logger.info("--start-row can't be greater than --end-row") return if not os.path.exists(options["csv"]): - print(f"Csv file: {options['csv']} doesn't exist.") + logger.info(f"Csv file: {options['csv']} doesn't exist.") return data = load_citations_file(options) if not data.empty: process_csv_data(data) else: - print("CSV file empty") + logger.info("CSV file is empty or start/end row returned no rows.") diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py index c60696e795..e6551b9152 100644 --- a/cl/corpus_importer/utils.py +++ b/cl/corpus_importer/utils.py @@ -641,6 +641,9 @@ def add_citations_to_cluster(cites: list[str], cluster_id: int) -> None: ).exists(): # Avoid adding a citation if we already have a citation from the # citation's reporter + logger.info( + f"We already have a citation from the reporter: {citation[0].corrected_reporter()} in the cluster id: {cluster_id}" + ) continue try: From 36528fdaf07f518b811fb2b9e0de8af2856e48d9 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Mon, 15 Jul 2024 16:49:04 -0600 Subject: [PATCH 055/372] feat(scrape_pacer_free_opinions): optional start date when running from the beginning of time (1950) Update code to save html files --- .../commands/scrape_pacer_free_opinions.py | 9 +++- cl/corpus_importer/tasks.py | 43 ++++++++----------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py index 7c0e0d2d52..4586f0c0dd 100644 --- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py +++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py @@ -425,7 +425,12 @@ def do_all(options: OptionsType): :return: None """ - start = datetime.date(1950, 5, 1) + + start = ( + options["date_start"] + if options["date_start"] + else datetime.date(1950, 5, 1) + ) end = datetime.date.today() dates = make_date_range_tuples(start, end, gap=7) @@ -508,7 +513,7 @@ def handle(self, *args: List[str], **options: OptionsType) -> None: if options["date_start"] and options["date_end"]: if options["date_start"] > options["date_end"]: # type: ignore - print( + logger.info( "Error: date-end must be greater or equal than date-start option." ) return diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py index 697050fb99..c8bdc12a88 100644 --- a/cl/corpus_importer/tasks.py +++ b/cl/corpus_importer/tasks.py @@ -398,30 +398,25 @@ def get_and_save_free_document_report( if log_id: # We only save the html when the script is run automatically every day log = PACERFreeDocumentLog.objects.get(pk=log_id) - for result in report.responses: - if isinstance(result, dict): - response = result.get("response") - query_start = result.get("start") - query_end = result.get("end") - - if response and query_start and query_end: - pacer_file = PacerHtmlFiles( - content_object=log, - upload_type=UPLOAD_TYPE.FREE_OPINIONS_REPORT, - ) - pacer_file.filepath.save( - f"free_opinions_report_{court_id}_from_{query_start.replace('/', '-')}_to_{query_end.replace('/', '-')}.html", - ContentFile(response.text.encode()), - ) - else: - # FreeOpinionReport now returns a list of dicts with additional data - # instead of a list of requests responses. - # This is temporary while the new version of juriscraper is added to - # courtlistener - logger.info( - "New version of juriscraper not yet implemented. Can't " - "save PacerHtmlFiles object." - ) + if hasattr(report, "responses_with_params"): + for result in report.responses_with_params: + # FreeOpinionReport now also returns a list of dicts with additional + # data instead of a list of requests responses. We do this to verify + # if we have the new version of juriscraper with the new attribute. + if isinstance(result, dict): + response = result.get("response") + query_start = result.get("start") + query_end = result.get("end") + + if response and query_start and query_end: + pacer_file = PacerHtmlFiles( + content_object=log, + upload_type=UPLOAD_TYPE.FREE_OPINIONS_REPORT, + ) + pacer_file.filepath.save( + f"free_opinions_report_{court_id}_from_{query_start.replace('/', '-')}_to_{query_end.replace('/', '-')}.html", + ContentFile(response.text.encode()), + ) document_rows_to_create = [] for row in results: From 7c00aa1a012e50dd2b314fb48c4b7fa7ff07a02d Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Tue, 16 Jul 2024 12:49:38 -0600 Subject: [PATCH 056/372] feat(scrape_pacer_free_opinions): rename migrations --- ...pe_noop.py => 0015_alter_pacerhtmlfiles_upload_type_noop.py} | 2 +- ..._noop.sql => 0015_alter_pacerhtmlfiles_upload_type_noop.sql} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cl/recap/migrations/{0014_alter_pacerhtmlfiles_upload_type_noop.py => 0015_alter_pacerhtmlfiles_upload_type_noop.py} (97%) rename cl/recap/migrations/{0014_alter_pacerhtmlfiles_upload_type_noop.sql => 0015_alter_pacerhtmlfiles_upload_type_noop.sql} (100%) diff --git a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py similarity index 97% rename from cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py rename to cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py index a6b0c7ac66..9c3ea8d835 100644 --- a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.py +++ b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py @@ -5,7 +5,7 @@ class Migration(migrations.Migration): dependencies = [ - ("recap", "0013_processingqueue_update"), + ("recap", "0014_add_acms_upload_type_noop"), ] operations = [ diff --git a/cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.sql similarity index 100% rename from cl/recap/migrations/0014_alter_pacerhtmlfiles_upload_type_noop.sql rename to cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.sql From 0b50d675c380a2b2d92e744a7779edf85adb636e Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Tue, 16 Jul 2024 13:05:31 -0600 Subject: [PATCH 057/372] feat(scrape_pacer_free_opinions): update migration --- .../0015_alter_pacerhtmlfiles_upload_type_noop.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py index 9c3ea8d835..f2c53368e6 100644 --- a/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py +++ b/cl/recap/migrations/0015_alter_pacerhtmlfiles_upload_type_noop.py @@ -30,7 +30,8 @@ class Migration(migrations.Migration): (14, "Case query result page"), (15, "Appellate Case query result page"), (16, "ACMS docket JSON object"), - (17, "Free opinions report"), + (17, "ACMS attachmente page JSON object"), + (18, "Free opinions report"), ], help_text="The type of object that is uploaded", ), @@ -56,7 +57,8 @@ class Migration(migrations.Migration): (14, "Case query result page"), (15, "Appellate Case query result page"), (16, "ACMS docket JSON object"), - (17, "Free opinions report"), + (17, "ACMS attachmente page JSON object"), + (18, "Free opinions report"), ], help_text="The type of object that is uploaded", ), From 9c819ceaedac4004f5329b81d544024c59c9d156 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 17 Jul 2024 11:21:35 -0600 Subject: [PATCH 058/372] feat(scrape_pacer_free_opinions): remove previous changes add a delay option to control time between cittions --- .../commands/import_citations_csv.py | 28 ++++++++----------- cl/corpus_importer/utils.py | 3 -- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/cl/citations/management/commands/import_citations_csv.py b/cl/citations/management/commands/import_citations_csv.py index f23649eeb6..98bd88255a 100644 --- a/cl/citations/management/commands/import_citations_csv.py +++ b/cl/citations/management/commands/import_citations_csv.py @@ -15,6 +15,7 @@ """ import os.path +import time import numpy as np import pandas as pd @@ -64,12 +65,11 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader: return data -def process_csv_data( - data: DataFrame | TextFileReader, -) -> None: +def process_csv_data(data: DataFrame | TextFileReader, options: dict) -> None: """Process citations from csv file :param data: rows from csv file + :param options: options passed to command :return: None """ @@ -84,18 +84,8 @@ def process_csv_data( continue if cluster_id and citation_to_add: - try: - add_citations_to_cluster([citation_to_add], cluster_id) - except Exception as e: - if "Field 'volume' expected" in str(e): - # Fail silently, we already know this issue - logger.info( - f"Row: {index} - Invalid volume in citation: {citation_to_add} for cluster: {cluster_id}" - ) - continue - else: - # Unknown issue - raise + add_citations_to_cluster([citation_to_add], cluster_id) + time.sleep(options["delay"]) class Command(BaseCommand): @@ -127,6 +117,12 @@ def add_arguments(self, parser): help="Limit number of rows to process.", required=False, ) + parser.add_argument( + "--delay", + type=float, + default=1.0, + help="How long to wait to add each citation (in seconds, allows floating numbers).", + ) def handle(self, *args, **options): if options["start_row"] and options["end_row"]: @@ -140,6 +136,6 @@ def handle(self, *args, **options): data = load_citations_file(options) if not data.empty: - process_csv_data(data) + process_csv_data(data, options) else: logger.info("CSV file is empty or start/end row returned no rows.") diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py index e6551b9152..c60696e795 100644 --- a/cl/corpus_importer/utils.py +++ b/cl/corpus_importer/utils.py @@ -641,9 +641,6 @@ def add_citations_to_cluster(cites: list[str], cluster_id: int) -> None: ).exists(): # Avoid adding a citation if we already have a citation from the # citation's reporter - logger.info( - f"We already have a citation from the reporter: {citation[0].corrected_reporter()} in the cluster id: {cluster_id}" - ) continue try: From dc92cd64f4073cf8b3e3b076dbcf950d6a8edb40 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Wed, 17 Jul 2024 12:29:36 -0600 Subject: [PATCH 059/372] feat(scrape_pacer_free_opinions): update function to pass delay and start and end rows --- .../commands/import_citations_csv.py | 45 ++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/cl/citations/management/commands/import_citations_csv.py b/cl/citations/management/commands/import_citations_csv.py index 98bd88255a..c559961995 100644 --- a/cl/citations/management/commands/import_citations_csv.py +++ b/cl/citations/management/commands/import_citations_csv.py @@ -35,27 +35,20 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader: :return: loaded data """ - start_row = None end_row = None - if options["start_row"] and options["end_row"]: - start_row = options["start_row"] - 1 if options["start_row"] > 1 else 0 - end_row = options["end_row"] - options["start_row"] + 1 # inclusive - - if options["start_row"] and not options["end_row"]: - start_row = options["start_row"] - 1 if options["start_row"] > 1 else 0 - - if options["end_row"] and not options["start_row"]: - end_row = options["end_row"] - - if options["limit"]: - end_row = options["limit"] + if options["end_row"] or options["limit"]: + end_row = ( + options["limit"] + if options["limit"] > options["end_row"] + else options["end_row"] + ) data = pd.read_csv( options["csv"], names=["cluster_id", "citation_to_add"], delimiter=",", - skiprows=start_row, + skiprows=options["start_row"] - 1 if options["start_row"] else None, nrows=end_row, ) @@ -65,11 +58,11 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader: return data -def process_csv_data(data: DataFrame | TextFileReader, options: dict) -> None: +def process_csv_data(data: DataFrame | TextFileReader, delay_s: float) -> None: """Process citations from csv file :param data: rows from csv file - :param options: options passed to command + :param delay_s: how long to wait to add each citation :return: None """ @@ -78,14 +71,12 @@ def process_csv_data(data: DataFrame | TextFileReader, options: dict) -> None: citation_to_add = row.get("citation_to_add") if not OpinionCluster.objects.filter(id=cluster_id).exists(): - logger.info( - f"Row: {index} - Opinion cluster doesn't exist: {cluster_id}" - ) + logger.info(f"Opinion cluster doesn't exist: {cluster_id}") continue if cluster_id and citation_to_add: add_citations_to_cluster([citation_to_add], cluster_id) - time.sleep(options["delay"]) + time.sleep(delay_s) class Command(BaseCommand): @@ -102,11 +93,13 @@ def add_arguments(self, parser): ) parser.add_argument( "--start-row", + default=0, type=int, help="Start row (inclusive).", ) parser.add_argument( "--end-row", + default=0, type=int, help="End row (inclusive).", ) @@ -121,14 +114,14 @@ def add_arguments(self, parser): "--delay", type=float, default=1.0, - help="How long to wait to add each citation (in seconds, allows floating numbers).", + help="How long to wait to add each citation (in seconds, allows floating " + "numbers).", ) def handle(self, *args, **options): - if options["start_row"] and options["end_row"]: - if options["start_row"] > options["end_row"]: - logger.info("--start-row can't be greater than --end-row") - return + if options["end_row"] and options["start_row"] > options["end_row"]: + logger.info("--start-row can't be greater than --end-row") + return if not os.path.exists(options["csv"]): logger.info(f"Csv file: {options['csv']} doesn't exist.") @@ -136,6 +129,6 @@ def handle(self, *args, **options): data = load_citations_file(options) if not data.empty: - process_csv_data(data, options) + process_csv_data(data, options["delay"]) else: logger.info("CSV file is empty or start/end row returned no rows.") From 2d690b87cdf2f0016c6d6a43d7d23d2fe4ec5de8 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Wed, 17 Jul 2024 23:34:45 -0500 Subject: [PATCH 060/372] fix(make_objects, save_everything): ingest new fields from juriscraper Also, generalize overwritting prevention in `update_or_create_docket` Remove incorrect assignment of "summaries" to OpinionCluster.syllabus save_everything now: - gets the Person object from Opinion.author_str to put in Opinion.author - does not assign a OpinionCluster.judges to Opinion.author when Opinion.author_str has a explicit value --- .../management/commands/cl_scrape_opinions.py | 39 +++++++++++++++---- cl/scrapers/utils.py | 23 +++++------ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py index a9bf3d8b2b..aa953cb946 100644 --- a/cl/scrapers/management/commands/cl_scrape_opinions.py +++ b/cl/scrapers/management/commands/cl_scrape_opinions.py @@ -83,6 +83,15 @@ def make_objects( ) -> Tuple[Docket, Opinion, OpinionCluster, List[Citation]]: """Takes the meta data from the scraper and associates it with objects. + The keys returned by juriscraper scrapers are defined by `self._all_attrs` + on OpinionSite and OralArgumentSite, where the legacy convention is to use + plural names. + + However, this function is also used by importers and user pages, that + may not respect this convention, thus the duplication of singular and + plural names, like in + `item.get("disposition") or item.get("dispositions", "")` + Returns the created objects. """ blocked = item["blocked_statuses"] @@ -106,20 +115,27 @@ def make_objects( appeal_from_str=item.get("lower_courts", ""), ) + # Note that if opinion.author_str has no value, and cluster.judges find + # a single judge, opinion.author will be populated with that Person object + # Check `save_everything` + + # For a discussion on syllabus vs summary, check + # https://github.com/freelawproject/juriscraper/issues/66 cluster = OpinionCluster( - judges=item.get("judges", ""), date_filed=item["case_dates"], date_filed_is_approximate=item["date_filed_is_approximate"], case_name=item["case_names"], case_name_short=case_name_short, source=item.get("cluster_source") or SOURCES.COURT_WEBSITE, precedential_status=item["precedential_statuses"], - nature_of_suit=item.get("nature_of_suit", ""), - summary=item.get("summary", ""), blocked=blocked, date_blocked=date_blocked, + judges=item.get("judges", ""), + nature_of_suit=item.get("nature_of_suit", ""), + disposition=item.get("disposition") or item.get("dispositions", ""), + other_dates=item.get("other_dates", ""), + summary=item.get("summary", ""), syllabus=item.get("summaries", ""), - disposition=item.get("disposition") or item.get("dispositions", "") ) cites = [item.get(key, "") for key in ["citations", "parallel_citations"]] @@ -139,7 +155,7 @@ def make_objects( download_url=url, joined_by_str=item.get("joined_by", ""), per_curiam=item.get("per_curiam", False), - author_str=item.get("author_str") or item.get("authors", "") + author_str=item.get("author_str") or item.get("authors", ""), ) cf = ContentFile(content) @@ -168,14 +184,21 @@ def save_everything( citation.cluster_id = cluster.pk citation.save() + if opinion.author_str: + candidate = async_to_sync(lookup_judges_by_messy_str)( + opinion.author_str, docket.court.pk, cluster.date_filed + ) + if candidate: + opinion.author = candidate[0] + if cluster.judges: candidate_judges = async_to_sync(lookup_judges_by_messy_str)( cluster.judges, docket.court.pk, cluster.date_filed ) - if len(candidate_judges) == 1: - opinion.author = candidate_judges[0] - if len(candidate_judges) > 1: + if len(candidate_judges) == 1 and not opinion.author_str: + opinion.author = candidate_judges[0] + elif len(candidate_judges) > 1: for candidate in candidate_judges: cluster.panel.add(candidate) diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py index 4355e3bc7b..f5d5177436 100644 --- a/cl/scrapers/utils.py +++ b/cl/scrapers/utils.py @@ -323,6 +323,7 @@ def update_or_create_docket( "ia_needs_upload": ia_needs_upload, "appeal_from_str": appeal_from_str, "date_blocked": date_blocked, + "date_argued": date_argued, } docket = async_to_sync(find_docket_object)(court_id, None, docket_number) @@ -330,25 +331,25 @@ def update_or_create_docket( # Update the existing docket with the new values docket.add_opinions_source(source) - # Prevent overwriting Docket.date_argued if it exists - if date_argued: - if docket.date_argued and date_argued != docket.date_argued: + for field, value in docket_fields.items(): + if not value: + continue + if getattr(docket, field) and getattr(docket, field) != value: + # Prevent overwriting values that already exist, since default values + # to this function are empty strings or None logger.error( - "Docket %s already has a date_argued %s, different than new date %s", + "Docket %s already has a %s %s, different than new value %s", docket.pk, - docket.date_argued, - date_argued, + field, + getattr(docket, field), + value, ) else: - docket.date_argued = date_argued - - for field, value in docket_fields.items(): - setattr(docket, field, value) + setattr(docket, field, value) else: # Create a new docket with docket_fields and additional fields docket = Docket( **docket_fields, - date_argued=date_argued, source=source, docket_number=docket_number, court_id=court_id, From 45e48b6f31eab03a69522e088fa7178c6a6dd2e9 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Thu, 18 Jul 2024 20:44:03 -0600 Subject: [PATCH 061/372] feat(import_citations_csv): implement reindex argument for command improve the code of the command in general refactor add_citations_to_cluster function --- .../commands/import_citations_csv.py | 54 +++++++++++++---- cl/corpus_importer/utils.py | 60 ++++++++++++------- 2 files changed, 81 insertions(+), 33 deletions(-) diff --git a/cl/citations/management/commands/import_citations_csv.py b/cl/citations/management/commands/import_citations_csv.py index c559961995..5451a6e67a 100644 --- a/cl/citations/management/commands/import_citations_csv.py +++ b/cl/citations/management/commands/import_citations_csv.py @@ -10,14 +10,20 @@ How to run the command: manage.py import_citations_csv --csv /opt/courtlistener/cl/assets/media/wl_citations_1.csv +# Add all citations from the file and reindex existing ones +manage.py import_citations_csv --csv /opt/courtlistener/cl/assets/media/wl_citations_1.csv --reindex + +# Add and index all citations from the file starting from row 2600000 and reindex existing ones +manage.py import_citations_csv --csv /opt/courtlistener/cl/assets/media/x.csv --start-row 2600000 --delay 0.1 + Note: If --limit is greater than --end-row, end row will be ignored """ +import argparse import os.path import time -import numpy as np import pandas as pd from django.core.management import BaseCommand from pandas import DataFrame @@ -37,6 +43,8 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader: end_row = None + dtype_mapping = {"cluster_id": "int", "citation_to_add": "str"} + if options["end_row"] or options["limit"]: end_row = ( options["limit"] @@ -47,27 +55,30 @@ def load_citations_file(options: dict) -> DataFrame | TextFileReader: data = pd.read_csv( options["csv"], names=["cluster_id", "citation_to_add"], + dtype=dtype_mapping, delimiter=",", skiprows=options["start_row"] - 1 if options["start_row"] else None, nrows=end_row, + na_filter=False, ) - # Replace nan in dataframe - data = data.replace(np.nan, "", regex=True) logger.info(f"Found {len(data.index)} rows in csv file: {options['csv']}") return data -def process_csv_data(data: DataFrame | TextFileReader, delay_s: float) -> None: +def process_csv_data( + data: DataFrame | TextFileReader, delay_s: float, reindex: bool +) -> None: """Process citations from csv file :param data: rows from csv file :param delay_s: how long to wait to add each citation + :param reindex: force reindex of citations :return: None """ for index, row in data.iterrows(): - cluster_id = int(row.get("cluster_id")) + cluster_id = row.get("cluster_id") citation_to_add = row.get("citation_to_add") if not OpinionCluster.objects.filter(id=cluster_id).exists(): @@ -75,7 +86,7 @@ def process_csv_data(data: DataFrame | TextFileReader, delay_s: float) -> None: continue if cluster_id and citation_to_add: - add_citations_to_cluster([citation_to_add], cluster_id) + add_citations_to_cluster([citation_to_add], cluster_id, reindex) time.sleep(delay_s) @@ -85,9 +96,22 @@ class Command(BaseCommand): def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) + def existing_path_type(self, path: str): + """Validate file path exists + + :param path: path to validate + :return: valid path + """ + if not os.path.exists(path): + raise argparse.ArgumentTypeError( + f"Csv file: {path} doesn't exist." + ) + return path + def add_arguments(self, parser): parser.add_argument( "--csv", + type=self.existing_path_type, help="Absolute path to a CSV file containing the citations to add.", required=True, ) @@ -117,18 +141,22 @@ def add_arguments(self, parser): help="How long to wait to add each citation (in seconds, allows floating " "numbers).", ) + parser.add_argument( + "--reindex", + action="store_true", + default=False, + help="Reindex citations if they are already in the system", + ) def handle(self, *args, **options): if options["end_row"] and options["start_row"] > options["end_row"]: logger.info("--start-row can't be greater than --end-row") return - if not os.path.exists(options["csv"]): - logger.info(f"Csv file: {options['csv']} doesn't exist.") - return - data = load_citations_file(options) - if not data.empty: - process_csv_data(data, options["delay"]) - else: + + if data.empty: logger.info("CSV file is empty or start/end row returned no rows.") + return + + process_csv_data(data, options["delay"], options["reindex"]) diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py index c60696e795..3e032d08dc 100644 --- a/cl/corpus_importer/utils.py +++ b/cl/corpus_importer/utils.py @@ -612,11 +612,14 @@ def merge_overlapping_data( return data_to_update -def add_citations_to_cluster(cites: list[str], cluster_id: int) -> None: +def add_citations_to_cluster( + cites: list[str], cluster_id: int, save_again_if_exists: bool = False +) -> None: """Add string citations to OpinionCluster if it has not yet been added :param cites: citation list :param cluster_id: cluster id related to citations + :param save_again_if_exists: force save citation if it already exists :return: None """ for cite in cites: @@ -636,29 +639,46 @@ def add_citations_to_cluster(cites: list[str], cluster_id: int) -> None: cite_type_str = citation[0].all_editions[0].reporter.cite_type reporter_type = map_reporter_db_cite_type(cite_type_str) - if Citation.objects.filter( - cluster_id=cluster_id, reporter=citation[0].corrected_reporter() - ).exists(): - # Avoid adding a citation if we already have a citation from the - # citation's reporter - continue - - try: - o, created = Citation.objects.get_or_create( - volume=citation[0].groups["volume"], - reporter=citation[0].corrected_reporter(), - page=citation[0].groups["page"], - type=reporter_type, + citation_params = { + "volume": citation[0].groups["volume"], + "reporter": citation[0].corrected_reporter(), + "page": citation[0].groups["page"], + "type": reporter_type, + "cluster_id": cluster_id, + } + citation_obj = Citation.objects.filter(**citation_params).first() + if citation_obj: + if save_again_if_exists: + # We already have the citation for the cluster and want to reindex it + citation_obj.save() + logger.info( + f"Reindexing: {cite} added to cluster id: {cluster_id}" + ) + else: + # Ignore and go to the next citation in the list + continue + else: + if Citation.objects.filter( cluster_id=cluster_id, - ) - if created: + reporter=citation[0].corrected_reporter(), + ).exists(): + # Avoid adding a citation if we already have a citation from the + # citation's reporter. + logger.info( + f"Can't add: {cite} to cluster id: {cluster_id}. There is already " + f"a citation from that reporter." + ) + continue + try: + # We don't have the citation or any citation from the reporter + Citation.objects.create(**citation_params) logger.info( f"New citation: {cite} added to cluster id: {cluster_id}" ) - except IntegrityError: - logger.warning( - f"Reporter mismatch for cluster: {cluster_id} on cite: {cite}" - ) + except IntegrityError: + logger.warning( + f"Reporter mismatch for cluster: {cluster_id} on cite: {cite}" + ) def update_cluster_panel( From 594737e11c99561e025578f1033e96c8a7bfa40d Mon Sep 17 00:00:00 2001 From: Jason Hopper Date: Fri, 19 Jul 2024 11:07:35 -0300 Subject: [PATCH 062/372] adding missing tables, setting import order to avoid foreign key constraint violations --- scripts/make_bulk_data.sh | 179 +++++++++++++++++++++++++++++++------- 1 file changed, 149 insertions(+), 30 deletions(-) diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh index 3a4a28f58e..56e0844a00 100755 --- a/scripts/make_bulk_data.sh +++ b/scripts/make_bulk_data.sh @@ -184,6 +184,10 @@ politicalaffiliation_fields='( )' politicalaffiliation_csv_filename="people-db-political-affiliations-$(date -I).csv" +# people_db_race +people_db_race_fields='(id, race)' +people_db_race_csv_filename="people_db_race-$(date -I).csv" + # people_db_person_race people_db_person_race_fields='( id, person_id, race_id @@ -258,38 +262,151 @@ disclosures_debt_fields='( )' disclosures_debt_csv_filename="financial-disclosures-debts-$(date -I).csv" + +people_db_attorneyorganization_fields='( + id, date_created, date_modified, lookup_key, name, address1, address2, city, state, zip_code + )' +people_db_attorneyorganization_csv_filename="people_db_attorneyorganization-$(date -I).csv" + + +people_db_attorney_fields='( + id, date_created, date_modified, name, contact_raw, phone, fax, email + )' +people_db_attorney_csv_filename="people_db_attorney-$(date -I).csv" + + +people_db_party_fields='( + id, date_created, date_modified, name, extra_info + )' +people_db_party_csv_filename="people_db_party-$(date -I).csv" + + +docket_fields='( + id, date_created, date_modified, date_cert_granted, date_cert_denied, date_argued, + date_reargued, date_reargument_denied, case_name_short, case_name, case_name_full, slug, + docket_number, blocked, court_id, assigned_to_id, cause, date_filed, date_list_filing, + date_terminated, filepath_ia, filepath_local, jurisdiction_type, jury_demand, + nature_of_suit, pacer_case_id, referred_to_id, source, assigned_to_str, view_count, + date_last_index, appeal_from_str, appellate_case_type_information, + appellate_fee_status, panel_str, originating_court_information_id, mdl_status, + filepath_ia_json, ia_date_first_change, ia_needs_upload, ia_upload_failure_count, + docket_number_core, idb_data_id + )' +dockets_csv_filename="search_docket-$(date -I).csv" + + + +people_db_partytype_fields='( + id, name, docket_id, party_id, date_terminated, extra_info, + highest_offense_level_opening, highest_offense_level_terminated + )' +people_db_partytype_csv_filename="people_db_partytype-$(date -I).csv" + + +fjcintegrateddatabase_fields='( + id, dataset_source, date_created, date_modified, office, docket_number, origin, date_filed, + jurisdiction, nature_of_suit, title, section, subsection, diversity_of_residence, class_action, + monetary_demand, county_of_residence, arbitrarion_at_filing, arbitration_at_termination, + multidistrict_litigation_docket_number, plaintiff, defendant, date_transfer, transfer_office, + transfer_docket_number, transfer_oprigin, date_terminated, termination_class_action_status, + procedural_progress, disposition, nature_of_judgement, amount_recieved, judgment, pro_se, + year_of_tape, circuit_id, district_id, nature_of_offense, version + )' +fjcintegrateddatabase_csv_filename="recap_fjcintegrateddatabase-$(date -I).csv" + + +people_db_criminalcount_fields='( + id, date_created, date_modified, creditor_name, description, + value_code, redacted, financial_disclosure_id + )' +people_db_criminalcount_csv_filename="people_db_criminalcount-$(date -I).csv" + + +people_db_criminalcomplaint_fields='( + id, name, disposition, status, party_type_id + )' +people_db_criminalcomplaint_csv_filename="people_db_criminalcomplaint-$(date -I).csv" + + +people_db_role_fields='( + id, role, date_action, attorney_id, docket_id, party_id, role_raw + )' +people_db_role_csv_filename="people_db_role-$(date -I).csv" + + +people_db_attorneyorganizationassociation_fields='( + id, attorney_id, attorney_organization_id, docket_id + )' +people_db_attorneyorganizationassociation_csv_filename="people_db_attorneyorganizationassociation-$(date -I).csv" + + +search_docketentry_fields='( + id, date_created_date_modified, date_filed, entry_number, description, docket_id, + pacer_sequence_number, recap_sequence_number + )' +search_docketentry_csv_filename="search_docketentry-$(date -I).csv" + + +search_opinioncluster_panel_fields='( + id, opinioncluster_id, person_id + )' +search_opinioncluster_panel_csv_filename="search_opinioncluster_panel-$(date -I).csv" + + +search_opinioncluster_non_participating_judges_fields='( + id, opinioncluster_id, person_id + )' +search_opinioncluster_non_participating_judges_csv_filename="search_opinioncluster_non_participating_judges-$(date -I).csv" + # If you add or remove a table, you need to update this number -NUM_TABLES=28 +NUM_TABLES=42 # Every new table added to bulk script should be added as an associative array -declare -a t_1=("search_court" "$court_fields" "$court_csv_filename") -declare -a t_2=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename") -declare -a t_3=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename") -declare -a t_4=("search_docket" "$docket_fields" "$dockets_csv_filename") -declare -a t_5=("search_originatingcourtinformation" "$originatingcourtinformation_fields" "$originatingcourtinformation_csv_filename") +# This ordering is important. Tables with foreign key constraints must be loaded in order. +declare -a t_1=("people_db_person" "$people_db_person_fields" "$people_db_person_csv_filename") +declare -a t_2=("people_db_race" "$people_db_race_fields" "$people_db_race_csv_filename") +declare -a t_3=("people_db_school" "$people_db_school_fields" "$people_db_school_csv_filename") +declare -a t_4=("search_court" "$court_fields" "$court_csv_filename") +declare -a t_5=("people_db_position" "$people_db_position_fields" "$people_db_position_csv_filename") declare -a t_6=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename") -declare -a t_7=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename") -declare -a t_8=("search_opinion" "$opinion_fields" "$opinions_csv_filename") -declare -a t_9=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename") -declare -a t_10=("search_citation" "$citation_fields" "$citations_csv_filename") -declare -a t_11=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename") -declare -a t_12=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename") -declare -a t_13=("people_db_person" "$people_db_person_fields" "$people_db_person_csv_filename") -declare -a t_14=("people_db_school" "$people_db_school_fields" "$people_db_school_csv_filename") -declare -a t_15=("people_db_position" "$people_db_position_fields" "$people_db_position_csv_filename") -declare -a t_16=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename") -declare -a t_17=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename") -declare -a t_18=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename") -declare -a t_19=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename") -declare -a t_20=("disclosures_financialdisclosure" "$financialdisclosure_fields" "$financialdisclosure_csv_filename") -declare -a t_21=("disclosures_investment" "$investment_fields" "$investment_csv_filename") -declare -a t_22=("disclosures_position" "$disclosures_position_fields" "$disclosures_position_csv_filename") -declare -a t_23=("disclosures_agreement" "$disclosures_agreement_fields" "$disclosures_agreement_csv_filename") -declare -a t_24=("disclosures_noninvestmentincome" "$noninvestmentincome_fields" "$noninvestmentincome_csv_filename") -declare -a t_25=("disclosures_spouseincome" "$spouseincome_fields" "$spouseincome_csv_filename") -declare -a t_26=("disclosures_reimbursement" "$disclosures_reimbursement_fields" "$disclosures_reimbursement_csv_filename") -declare -a t_27=("disclosures_gift" "$disclosures_gift_fields" "$disclosures_gift_csv_filename") -declare -a t_28=("disclosures_debt" "$disclosures_debt_fields" "$disclosures_debt_csv_filename") +declare -a t_7=("search_originatingcourtinformation" "$originatingcourtinformation_fields" "$originatingcourtinformation_csv_filename") + +declare -a t_8=("people_db_attorneyorganization" "$people_db_attorneyorganization_fields" "$people_db_attorneyorganization_csv_filename") +declare -a t_9=("people_db_attorney" "$people_db_attorney_fields" "$people_db_attorney_csv_filename") +declare -a t_10=("people_db_party" "$people_db_party_fields" "$people_db_party_csv_filename") +declare -a t_11=("search_docket" "$docket_fields" "$dockets_csv_filename") +declare -a t_12=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename") +declare -a t_13=("people_db_partytype" "$people_db_partytype_fields" "$people_db_partytype_csv_filename") +declare -a t_14=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename") +declare -a t_15=("people_db_criminalcount" "$people_db_criminalcount_fields" "$people_db_criminalcount_csv_filename") +declare -a t_16=("people_db_criminalcomplaint" "$people_db_criminalcomplaint_fields" "$people_db_criminalcomplaint_csv_filename") +declare -a t_17=("people_db_role" "$people_db_role_fields" "$people_db_role_csv_filename") +declare -a t_18=("people_db_attorneyorganizationassociation" "$people_db_attorneyorganizationassociation_fields" "$people_db_attorneyorganizationassociation_csv_filename") +declare -a t_19=("search_docketentry" "$search_docketentry_fields" "$search_docketentry_csv_filename") +declare -a t_20=("search_opinioncluster_panel" "$search_opinioncluster_panel_fields" "$search_opinioncluster_panel_csv_filename") +declare -a t_21=("search_opinioncluster_non_participating_judges" "$search_opinioncluster_non_participating_judges_fields" "$search_opinioncluster_non_participating_judges_csv_filename") + +declare -a t_22=("search_opinion" "$opinion_fields" "$opinions_csv_filename") +declare -a t_23=("search_opinion_joined_by" "$search_opinion_joined_by_fields" "$search_opinion_joined_by_csv_filename") +declare -a t_24=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename") +declare -a t_25=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename") +declare -a t_26=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename") +declare -a t_27=("search_citation" "$citation_fields" "$citations_csv_filename") +declare -a t_28=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename") +declare -a t_29=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename") +declare -a t_30=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename") +declare -a t_31=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename") +declare -a t_32=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename") +declare -a t_33=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename") +declare -a t_34=("disclosures_financialdisclosure" "$financialdisclosure_fields" "$financialdisclosure_csv_filename") +declare -a t_35=("disclosures_investment" "$investment_fields" "$investment_csv_filename") +declare -a t_36=("disclosures_position" "$disclosures_position_fields" "$disclosures_position_csv_filename") +declare -a t_37=("disclosures_agreement" "$disclosures_agreement_fields" "$disclosures_agreement_csv_filename") +declare -a t_38=("disclosures_noninvestmentincome" "$noninvestmentincome_fields" "$noninvestmentincome_csv_filename") +declare -a t_39=("disclosures_spouseincome" "$spouseincome_fields" "$spouseincome_csv_filename") +declare -a t_40=("disclosures_reimbursement" "$disclosures_reimbursement_fields" "$disclosures_reimbursement_csv_filename") +declare -a t_41=("disclosures_gift" "$disclosures_gift_fields" "$disclosures_gift_csv_filename") +declare -a t_42=("disclosures_debt" "$disclosures_debt_fields" "$disclosures_debt_csv_filename") # Create a new array with the data of each associative array declare -a listOfLists @@ -305,7 +422,7 @@ echo "Streaming ${lst[0]} to S3" psql \ --command \ "set statement_timeout to 0; - COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, FORCE_QUOTE *)" \ + COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE '`', FORCE_QUOTE *)" \ --quiet \ --host "$DB_HOST" \ --username "$DB_USER" \ @@ -324,6 +441,8 @@ pg_dump \ --table 'search_*' \ --table 'people_db_*' \ --table 'audio_*' \ + --table 'recap_*' \ + --table 'disclosures_*' \ --no-privileges \ --no-publications \ --no-subscriptions courtlistener | \ @@ -384,7 +503,7 @@ declare -a lst="$group" cat >> "$OUT" <<- EOF echo "Loading ${lst[2]} to database" psql --command \ -"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, HEADER)" \ +"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, QUOTE '`', HEADER)" \ --host "\$BULK_DB_HOST" \ --username "\$BULK_DB_USER" \ --dbname "\$BULK_DB_NAME" From 9d502c79a737f3d4b3deb7d62592d850590c033f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:12:26 +0000 Subject: [PATCH 063/372] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/make_bulk_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh index 56e0844a00..24818761e4 100755 --- a/scripts/make_bulk_data.sh +++ b/scripts/make_bulk_data.sh @@ -306,7 +306,7 @@ people_db_partytype_csv_filename="people_db_partytype-$(date -I).csv" fjcintegrateddatabase_fields='( id, dataset_source, date_created, date_modified, office, docket_number, origin, date_filed, jurisdiction, nature_of_suit, title, section, subsection, diversity_of_residence, class_action, - monetary_demand, county_of_residence, arbitrarion_at_filing, arbitration_at_termination, + monetary_demand, county_of_residence, arbitrarion_at_filing, arbitration_at_termination, multidistrict_litigation_docket_number, plaintiff, defendant, date_transfer, transfer_office, transfer_docket_number, transfer_oprigin, date_terminated, termination_class_action_status, procedural_progress, disposition, nature_of_judgement, amount_recieved, judgment, pro_se, From d7132ec90bf778ae9f28855b31724b4a21bee33e Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Fri, 19 Jul 2024 10:57:20 -0600 Subject: [PATCH 064/372] fix(opinion_order): update poetry.lock --- poetry.lock | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index f7f6c67e40..4d48c0c2ed 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1116,6 +1116,17 @@ files = [ [package.dependencies] Django = ">=3.2" +[[package]] +name = "django-ordered-model" +version = "3.7.4" +description = "Allows Django models to be ordered and provides a simple admin interface for reordering them." +optional = false +python-versions = "*" +files = [ + {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"}, + {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"}, +] + [[package]] name = "django-override-storage" version = "0.3.2" @@ -5461,4 +5472,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.12, <3.13" -content-hash = "e6d34875888f1687912d03d33ea68038bba6c6d487037c6454d5b18449ec6d0c" +content-hash = "5334f16d006f7486a5f9b905906f2a9a68e7f524684c04af3d0994ebd0999384" From 2013633d8c5b87d6fc7dfc4dfc4c701ad0fb18c2 Mon Sep 17 00:00:00 2001 From: Kevin Ramirez Date: Fri, 19 Jul 2024 12:22:47 -0600 Subject: [PATCH 065/372] refactor(update_opinions_order): refactor code --- .../commands/update_opinions_order.py | 425 +----------------- 1 file changed, 20 insertions(+), 405 deletions(-) diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py index 5b86c98130..85ed93e0e2 100644 --- a/cl/corpus_importer/management/commands/update_opinions_order.py +++ b/cl/corpus_importer/management/commands/update_opinions_order.py @@ -1,15 +1,20 @@ import os.path import re -from typing import Any, Optional +from typing import Optional -from bs4 import BeautifulSoup, NavigableString, Tag +from bs4 import BeautifulSoup from django.core.management import BaseCommand from django.db import transaction from django.db.models import Count -from cl.corpus_importer.utils import compare_documents, similarity_scores +from cl.corpus_importer.import_columbia.columbia_utils import ( + extract_columbia_opinions, + map_opinion_types, + process_extracted_opinions, + read_xml_to_soup, +) +from cl.corpus_importer.utils import EmptyOpinionException, match_opinion_lists from cl.lib.command_utils import logger -from cl.lib.string_diff import get_cosine_similarity from cl.search.models import SOURCES, Opinion, OpinionCluster VALID_COLUMBIA_SOURCES = [ @@ -23,393 +28,6 @@ ] -# TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged - -SIMPLE_TAGS = [ - "attorneys", - "caption", - "citation", - "court", - "date", - "docket", - "hearing_date", - "panel", - "posture", - "reporter_caption", -] - - -class EmptyOpinionException(Exception): - """An exception for opinions that raise a ZeroDivisionError Exception due empty - opinion tag or empty opinion content in cl""" - - def __init__(self, message: str) -> None: - self.message = message - - -def read_xml_to_soup(filepath: str) -> BeautifulSoup: - """This function reads the xml file, fixes the bad tags in columbia xml - files and returns a BeautifulSoup object - - :param filepath: path to xml file - :return: BeautifulSoup object of parsed content - """ - with open(filepath, "r", encoding="utf-8") as f: - file_content = f.read() - # Sometimes opening and ending tag mismatch (e.g. ed7c6b39dcb29c9c.xml) - file_content = file_content.replace( - "", "" - ) - # Fix opinion with invalid attribute - if "" in file_content: - file_content = file_content.replace( - "", "" - ) - file_content = file_content.replace("", "").replace( - "", "" - ) - return BeautifulSoup(file_content, "lxml") - - -def add_floating_opinion( - opinions: list, floating_content: list, opinion_order: int -) -> list: - """We have found floating opinions in bs object, we keep the opinion - content as a new opinion - - :param opinions: a list with opinions found - :param floating_content: content that is not in known non-opinion tags - :param opinion_order: opinion position - :return: updated list of opinions - """ - op_type = "opinion" - if opinions: - if opinions[-1].get("type"): - # Use type of previous opinion if exists - op_type = opinions[-1].get("type") - - # Get rid of double spaces from floating content - opinion_content = re.sub( - " +", " ", "\n".join(floating_content) - ).strip() # type: str - if opinion_content: - opinions.append( - { - "opinion": opinion_content, - "order": opinion_order, - "byline": "", - "type": op_type, - } - ) - return opinions - - -def extract_columbia_opinions( - outer_opinion: BeautifulSoup, -) -> list[Optional[dict]]: - """We extract all possible opinions from BeautifulSoup, with and without - author, and we create new opinions if floating content exists(content that - is not explicitly defined within an opinion tag or doesn't have an author) - - :param outer_opinion: element containing all xml tags - :return: list of opinion dicts - """ - opinions: list = [] - floating_content = [] - order = 0 - - # We iterate all content to look for all possible opinions - for i, content in enumerate(outer_opinion): # type: int, Tag - if isinstance(content, NavigableString): - # We found a raw string, store it - floating_content.append(str(content)) - else: - if content.name in SIMPLE_TAGS + [ - "citation_line", - "opinion_byline", - "dissent_byline", - "concurrence_byline", - ]: - # Ignore these tags, it will be processed later - continue - elif content.name in [ - "opinion_text", - "dissent_text", - "concurrence_text", - ]: - if floating_content: - # We have found an opinion, but there is floating - # content, we create a dict with the opinion using the - # floating content with default type = "opinion" - opinions = add_floating_opinion( - opinions, floating_content, order - ) - floating_content = [] - - byline = content.find_previous_sibling() - opinion_author = "" - if byline and "_byline" in byline.name: - opinion_author = byline.get_text() - - opinion_content = re.sub( - " +", " ", content.decode_contents() - ).strip() - if opinion_content: - # Now we create a dict with current opinion - opinions.append( - { - "opinion": opinion_content, - "order": order, - "byline": opinion_author, - "type": content.name.replace("_text", ""), - } - ) - order = order + 1 - - else: - if content.name not in SIMPLE_TAGS + ["syllabus"]: - # We store content that is not inside _text tag and is - # not in one of the known non-opinion tags - floating_content.append(str(content)) - - # Combine the new content into another opinion. great. - if floating_content: - # If we end to go through all the found opinions and if we still - # have floating content out there, we create a new opinion with the - # last type of opinion - opinions = add_floating_opinion(opinions, floating_content, order) - return opinions - - -def is_per_curiam_opinion( - content: Optional[str], byline: Optional[str] -) -> bool: - """Check if opinion author is per curiam - :param content: opinion content - :param byline: opinion text author - :return: True if opinion author is per curiam - """ - if byline and "per curiam" in byline[:1000].lower(): - return True - if content and "per curiam" in content[:1000].lower(): - return True - return False - - -def merge_opinions( - opinions: list, content: list, current_order: int -) -> tuple[list, int]: - """Merge last and previous opinion if are the same type or create a new - opinion if merge is not possible - - :param opinions: list of opinions that is being updated constantly - :param content: list of opinions without an author - :param current_order: opinion position - :return: updated list of opinions - """ - - # We check if the previous stored opinion matches the type of the - # content, and we store the opinion dict temporary - relevant_opinions = ( - [opinions[-1]] - if opinions and opinions[-1]["type"] == content[0].get("type") - else [] - ) - - if relevant_opinions: - relevant_opinions[-1]["opinion"] += "\n" + "\n".join( - [f.get("opinion") for f in content if f.get("opinion")] - ) - - else: - # No relevant opinions found, create a new opinion with the content - opinion_content = "\n".join( - [f.get("opinion") for f in content if f.get("opinion")] - ) - new_opinion = { - "byline": None, - "type": content[0].get("type"), - "opinion": opinion_content, - "order": current_order, - "per_curiam": is_per_curiam_opinion(opinion_content, None), - } - opinions.append(new_opinion) - current_order = current_order + 1 - - return opinions, current_order - - -def process_extracted_opinions(extracted_opinions: list) -> list: - """We read the extracted data in extract_opinions function to merge all - possible floating opinions (it is not explicitly defined within an opinion - tag or doesn't have an author) - - :param extracted_opinions: list of opinions obtained from xml file - :return: a list with extracted and processed opinions - """ - - opinions: list = [] - authorless_content = [] - order = 0 - - for i, found_content in enumerate(extracted_opinions, start=1): - byline = found_content.get("byline") - if not byline: - # Opinion has no byline, store opinion content - authorless_content.append(found_content) - - if byline: - # Opinion has byline, get opinion type and content - opinion_type = found_content.get("type") - opinion_content = found_content.get("opinion", "") - # Store content that doesn't match the current opinion type - alternative_authorless_content = [ - content - for content in authorless_content - if content.get("type") != opinion_type - ] - # Keep content that matches the current type - authorless_content = [ - op_content - for op_content in authorless_content - if op_content.get("type") == opinion_type - ] - - if alternative_authorless_content: - # Keep floating text that are not from the same type, - # we need to create a separate opinion for those, - # for example: in 2713f39c5a8e8684.xml we have an opinion - # without an author, and the next opinion with an author is - # a dissent opinion, we can't combine both - opinions, order = merge_opinions( - opinions, alternative_authorless_content, order - ) - - opinion_content = ( - "\n".join( - [ - f.get("opinion") - for f in authorless_content - if f.get("type") == opinion_type - ] - ) - + "\n\n" - + opinion_content - ) - - # Add new opinion - new_opinion = { - "byline": byline, - "type": opinion_type, - "opinion": opinion_content, - "order": order, - "per_curiam": is_per_curiam_opinion(opinion_content, byline), - } - - opinions.append(new_opinion) - order = order + 1 - authorless_content = [] - - if len(extracted_opinions) == i and authorless_content: - # If is the last opinion, and we still have opinions without - # byline, create an opinion without an author and the contents - # that couldn't be merged - opinions, order = merge_opinions( - opinions, authorless_content, order - ) - - return opinions - - -def map_opinion_types(opinions=None) -> None: - """Map opinion type to model field choice - - :param opinions: a list that contains all opinions as dict elements - :return: None - """ - - if opinions is None: - opinions = [] - lead = False - for op in opinions: - op_type = op.get("type") - # Only first opinion with "opinion" type is a lead opinion, the next - # opinion with "opinion" type is an addendum - if not lead and op_type and op_type == "opinion": - lead = True - op["type"] = "020lead" - continue - elif lead and op_type and op_type == "opinion": - op["type"] = "050addendum" - elif op_type and op_type == "dissent": - op["type"] = "040dissent" - elif op_type and op_type == "concurrence": - op["type"] = "030concurrence" - - -def match_opinion_lists( - file_opinions_list: list[Any], cl_opinions_list: list[Any] -) -> dict[int, int]: - """Try to match the opinions on two lists and generate a dict with position of - matching opinions - - Remove non-alphanumeric and non-whitespace characters from lowercased text, - this tries to make both texts in equal conditions to prove if both are similar or - equal - - get_cosine_similarity works great when both texts are almost the same with very - small variations - - Sometimes cosine similarity fails when there are small variations in text, - such as parties, attorneys, case name, or court that are included in the content - of the opinion, compare_documents() checks the percentage of the file opinion - text that it is in courtlistener opinion, having a large percentage means that - almost all the file opinion is in courtlistener opinion, but there is a - possibility that the courtlistener opinion contains some additional data in que - opinion content (such as case name, parties, etc.) - - compare_documents works good when the opinion from the file is a subset of the - opinion in CL, the percentage represents how much of the opinion of the file is - in the opinion from cl (content in cl opinion can have other data in the body - like posture, attorneys, etc. e.g. in cluster id: 7643871 we have the posture and - the opinion text but in the xml file we only have the opinion text, cosine_sim: - 0.1639075094124459 and percent_match: 73) - - Sometimes one algorithm performs better than the other, this is due to some - additional text, such as editor's notes, or the author, page number or posture - added to the opinion - - Key is opinion position from file, Value is opinion position from cl opinion e.g. - matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file opinion and - 2 is cl opinion - - :param file_opinions_list: Opinions from file - :param cl_opinions_list: CL opinions - :return: Matches if found or empty dict - """ - - scores = similarity_scores(file_opinions_list, cl_opinions_list) - - matches = {} - for i, row in enumerate(scores): - j = row.argmax() # type: ignore - file_opinion = re.sub( - r"[^a-zA-Z0-9 ]", "", file_opinions_list[i].lower() - ) - cl_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", cl_opinions_list[j].lower()) - - cosine_sim = get_cosine_similarity(file_opinion, cl_opinion) - - percent_match = compare_documents(file_opinion, cl_opinion) - - if cosine_sim < 0.60 and percent_match < 60: - continue - - matches[i] = j - - return matches - - def clean_opinion_content(text: str) -> str: """Clean opinion content @@ -424,9 +42,6 @@ def clean_opinion_content(text: str) -> str: return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower()) -# TODO ------------------------ remove until here ------------------------------- - - def get_opinions_cleaned_content( cluster_id, ) -> tuple[Optional[str], list[dict], int, bool]: @@ -531,6 +146,8 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None: """We assume that harvard data is already ordered, we just need to fill the order field in each opinion + The harvard importer created the opinions in order of appearance in the file + :param start_id: skip any id lower than this value :param end_id: skip any id greater than this value :return: None @@ -795,25 +412,23 @@ def add_arguments(self, parser): ) def handle(self, *args, **options): - if options["process_harvard"] and options["process_columbia"]: - print( - "You can only select one option process-harvard or process-columbia" + + if not options["process_harvard"] and not options["process_columbia"]: + logger.info( + "One option required: process-harvard or process-columbia" ) return - if not options["process_harvard"] and not options["process_columbia"]: - print("One option required: process-harvard or process-columbia") + if options["process_harvard"] and options["process_columbia"]: + logger.info( + "You can only select one option process-harvard or process-columbia" + ) return if options["process_harvard"]: sort_harvard_opinions(options["start_id"], options["end_id"]) - if options["process_columbia"] and options["xml_dir"]: + if options["process_columbia"]: sort_columbia_opinions( options["start_id"], options["end_id"], options["xml_dir"] ) - - if options["process_columbia"] and not options["xml_dir"]: - print( - "Argument --xml-dir required to read xml files from mounted directory" - ) From 48184509bf03ca11ee2d3a75ff7bbb6ffc5f809f Mon Sep 17 00:00:00 2001 From: William Palin Date: Fri, 19 Jul 2024 16:12:23 -0400 Subject: [PATCH 066/372] feat(UI): Update state court picker Add territories as its own section in the state court picker Also - remove code that bundles courts and move it to a template filter to simplify things. --- cl/custom_filters/templatetags/extras.py | 38 ++++++++++++++++++- cl/lib/search_utils.py | 25 +++--------- .../includes/jurisdiction_picker_modal.html | 36 +++++++++++------- 3 files changed, 65 insertions(+), 34 deletions(-) diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py index 40d2813cda..2624e81cb6 100644 --- a/cl/custom_filters/templatetags/extras.py +++ b/cl/custom_filters/templatetags/extras.py @@ -10,7 +10,7 @@ from django.utils.safestring import SafeString, mark_safe from elasticsearch_dsl import AttrDict, AttrList -from cl.search.models import Docket, DocketEntry +from cl.search.models import Court, Docket, DocketEntry register = template.Library() @@ -243,3 +243,39 @@ def get_highlight(result: AttrDict | dict[str, any], field: str) -> any: original_value = result.get(field, "") return render_string_or_list(hl_value) if hl_value else original_value + + +@register.filter +def group_courts(courts: list[Court], num_columns: int) -> list: + """Divide courts in equal groupings while keeping related courts together + + :param courts: Courts to group. + :param num_columns: Number of groups wanted + :return: The courts grouped together + """ + + column_len = len(courts) // num_columns + remainder = len(courts) % num_columns + + groups = [] + start = 0 + for index in range(num_columns): + # Calculate the end index for this chunk + end = start + column_len + (1 if index < remainder else 0) + + # Find the next 'TS' or 'S' starting point + while end < len(courts) and courts[end].jurisdiction not in [ + "TS", + "S", + ]: + end += 1 + + # Adjust the chunk to start with 'TS' or 'S' + while start < end and courts[start].jurisdiction not in ["TS", "S"]: + start += 1 + + # Create the column and add it to result + groups.append(courts[start:end]) + start = end + + return groups diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py index 5a3fdb6afb..affb89318e 100644 --- a/cl/lib/search_utils.py +++ b/cl/lib/search_utils.py @@ -233,8 +233,8 @@ def merge_form_with_courts( } bap_bundle = [] b_bundle = [] - state_bundle: List = [] - state_bundles = [] + states = [] + territories = [] for court in courts: if court.jurisdiction == Court.FEDERAL_APPELLATE: court_tabs["federal"].append(court) @@ -247,15 +247,9 @@ def merge_form_with_courts( else: b_bundle.append(court) elif court.jurisdiction in Court.STATE_JURISDICTIONS: - # State courts get bundled by supreme courts - if court.jurisdiction == Court.STATE_SUPREME: - # Whenever we hit a state supreme court, we append the - # previous bundle and start a new one. - if state_bundle: - state_bundles.append(state_bundle) - state_bundle = [court] - else: - state_bundle.append(court) + states.append(court) + elif court.jurisdiction in Court.TERRITORY_JURISDICTIONS: + territories.append(court) elif court.jurisdiction in [ Court.FEDERAL_SPECIAL, Court.COMMITTEE, @@ -265,18 +259,11 @@ def merge_form_with_courts( ]: court_tabs["special"].append(court) - # append the final state bundle after the loop ends. Hack? - state_bundles.append(state_bundle) - # Put the bankruptcy bundles in the courts dict if bap_bundle: court_tabs["bankruptcy_panel"] = [bap_bundle] court_tabs["bankruptcy"] = [b_bundle] - - # Divide the state bundles into the correct partitions - court_tabs["state"].append(state_bundles[:17]) - court_tabs["state"].append(state_bundles[17:34]) - court_tabs["state"].append(state_bundles[34:]) + court_tabs["state"] = [states, territories] return court_tabs, court_count_human, court_count diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html index 842337c0da..1cf3800812 100644 --- a/cl/search/templates/includes/jurisdiction_picker_modal.html +++ b/cl/search/templates/includes/jurisdiction_picker_modal.html @@ -1,4 +1,5 @@ {% load partition_util %} +{% load extras %}