diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index c9698ea5..1561ffe0 100755 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -2,7 +2,7 @@ name: deploy on: push: - branches: [main, develop, CU-8686edfh0-performance] + branches: [main, develop, CU-86897a79g-cache-misses] jobs: deploy: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2972d2c3..4720875c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies run: | sudo apt update - sudo apt install -y --no-install-recommends postgresql-client-14 + sudo apt install -y --no-install-recommends postgresql-client python -m pip install pip-tools pip-sync requirements/base/base.txt requirements/dev/dev.txt requirements/test/test.txt - name: Create NC database diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 814bf39d..ecf8bd74 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,22 +1,22 @@ repos: - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 24.10.0 hooks: - id: black language_version: python3.10 exclude: migrations - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 + rev: 7.1.1 hooks: - id: flake8 exclude: settings|migrations|tests - repo: https://github.com/pycqa/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort args: ["--profile", "black", "--filter-files"] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer diff --git a/Dockerfile b/Dockerfile index 66f4e32a..4a46f7fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,19 @@ -FROM node:18.17.0-bullseye-slim as static_files +FROM node:18.17.0-bullseye-slim AS static_files WORKDIR /code -ENV PATH /code/node_modules/.bin:$PATH +ENV PATH=/code/node_modules/.bin:$PATH COPY frontend/package.json frontend/package-lock.json /code/ -RUN npm install -g npm@latest RUN npm install --silent COPY frontend/ /code/ RUN npm run build -FROM python:3.10-slim-bullseye as base +FROM python:3.10-slim-bullseye AS base # Create a group and user to run our app ARG APP_USER=appuser RUN groupadd -r ${APP_USER} && useradd --no-log-init -r -g ${APP_USER} ${APP_USER} # Install packages needed to run your application (not build deps): -# mime-support -- for mime types when serving static files # postgresql-client -- for running database commands # We need to recreate the /usr/share/man/man{1..8} directories first because # they were clobbered by a parent image. @@ -127,7 +125,6 @@ RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/ jq \ libpcre3 \ libpq-dev \ - mime-support \ nodejs \ openssh-client \ postgresql-client-12 \ diff --git a/Makefile b/Makefile index d7a9a229..2ffca190 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ update_requirements: pip-compile --output-file=requirements/base/base.txt requirements/base/base.in pip-compile --output-file=requirements/test/test.txt requirements/test/test.in pip-compile --output-file=requirements/dev/dev.txt requirements/dev/dev.in - pip-compile --output-file=requirements/deploy/deploy.txt requirements/deploy/deploy.in + pip-compile --upgrade --output-file=requirements/deploy/deploy.txt requirements/deploy/deploy.in install_requirements: @echo 'Installing pip-tools...' diff --git a/deploy/deploy-cf-stack.yml b/deploy/deploy-cf-stack.yml index 3c293899..05cf8b09 100644 --- a/deploy/deploy-cf-stack.yml +++ b/deploy/deploy-cf-stack.yml @@ -15,3 +15,22 @@ unexpectedly), execute the changeset. When complete, hit RETURN to continue or Control-C, then A to abort. when: cloudformation_stack_result is changed + +- name: cdn (cloudfront) management + hosts: cdn + gather_facts: false + tags: cdn + vars: + ansible_connection: local + ansible_python_interpreter: "{{ ansible_playbook_python }}" + roles: + - role: caktus.aws-web-stacks + tasks: + - name: Prompt if CloudFormation stack changed + pause: + prompt: > + A changeset was created for the CloudFormation stack. You MUST login to the AWS console, + inspect the changeset, and if everything looks good (i.e., it's not DELETING anything + unexpectedly), execute the changeset. When complete, hit RETURN to continue or Control-C, + then A to abort. + when: cloudformation_stack_result is changed diff --git a/deploy/group_vars/all.yml b/deploy/group_vars/all.yml index a4257c85..c25908ea 100644 --- a/deploy/group_vars/all.yml +++ b/deploy/group_vars/all.yml @@ -23,41 +23,37 @@ admin_database_password: !vault | # provisioning with aws-web-stacks. # ---------------------------------------------------------------------------- -stack_name: "{{ app_name }}-stack" - -cloudformation_stack: - region: "{{ aws_region }}" - stack_name: "{{ stack_name }}" - template_bucket: "aws-web-stacks-{{ app_name }}" - # Webstacks required variable - template_local_path: '{{ playbook_dir + "/stack/eks-no-nat.yml" }}' - create_changeset: true - termination_protection: true - - template_parameters: - PrimaryAZ: "{{ aws_region }}a" - SecondaryAZ: "{{ aws_region }}b" - DesiredScale: 2 - MaxScale: 4 - UseAES256Encryption: "true" - CustomerManagedCmkArn: "" - ContainerInstanceType: t3a.large - ContainerVolumeSize: 40 - DatabaseAllocatedStorage: 100 - DatabaseClass: db.t3.large - DatabaseEngineVersion: "12" - DatabaseParameterGroupFamily: postgres12 - DatabaseMultiAZ: "false" - DatabaseUser: "{{ app_name }}_admin" - DatabasePassword: "{{ admin_database_password }}" - DatabaseName: "{{ app_name }}" - DomainName: nccopwatch.org - DomainNameAlternates: "" - AssetsCloudFrontDomain: files.nccopwatch.org - AssetsCloudFrontCertArn: arn:aws:acm:us-east-1:606178775542:certificate/379950bb-4b29-4308-8418-122674fe1076 - AssetsUseCloudFront: "true" - tags: - Environment: "{{ app_name }}" +cloudformation_stack_state: present +cloudformation_stack_profile: '{{ aws_profile }}' +cloudformation_stack_region: '{{ aws_region }}' +cloudformation_stack_name: "{{ app_name }}-stack" +cloudformation_stack_termination_protection: true +cloudformation_stack_template_bucket: "aws-web-stacks-{{ app_name }}" +cloudformation_stack_template_local_path: '{{ playbook_dir + "/stack/eks-no-nat.yml" }}' +cloudformation_stack_template_parameters: + PrimaryAZ: "{{ aws_region }}a" + SecondaryAZ: "{{ aws_region }}b" + DesiredScale: 2 + MaxScale: 4 + UseAES256Encryption: "true" + CustomerManagedCmkArn: "" + ContainerInstanceType: db.t4g.xlarge + ContainerVolumeSize: 40 + DatabaseAllocatedStorage: 100 + DatabaseClass: db.t4g.xlarge + DatabaseEngineVersion: "12" + DatabaseParameterGroupFamily: postgres12 + DatabaseMultiAZ: "false" + DatabaseUser: "{{ app_name }}_admin" + DatabasePassword: "{{ admin_database_password }}" + DatabaseName: "{{ app_name }}" + DomainName: nccopwatch.org + DomainNameAlternates: "" + AssetsCloudFrontDomain: files.nccopwatch.org + AssetsCloudFrontCertArn: arn:aws:acm:us-east-1:606178775542:certificate/379950bb-4b29-4308-8418-122674fe1076 + AssetsUseCloudFront: "true" +cloudformation_stack_tags: + Environment: "{{ app_name }}" # Install Descheduler to attempt to spread out pods again after node failures k8s_install_descheduler: yes @@ -79,7 +75,7 @@ k8s_descheduler_release_values: # clusters for web apps. # ---------------------------------------------------------------------------- -k8s_cluster_name: "{{ cloudformation_stack.stack_name }}-cluster" +k8s_cluster_name: "{{ cloudformation_stack_name }}-cluster" k8s_context: "arn:aws:eks:us-east-2:606178775542:cluster/{{ k8s_cluster_name }}" k8s_cluster_type: aws diff --git a/deploy/group_vars/cdn.yml b/deploy/group_vars/cdn.yml new file mode 100644 index 00000000..0f27d09c --- /dev/null +++ b/deploy/group_vars/cdn.yml @@ -0,0 +1,23 @@ +cloudfront_domain_name: "{{ k8s_domain_names[0] }}" +cloudfront_domain_name_origin: "{{ env_name }}-origin.nccopwatch.org" + +cloudformation_stack_state: present +cloudformation_stack_profile: '{{ aws_profile }}' +cloudformation_stack_region: '{{ aws_region }}' +cloudformation_stack_name: 'cdn-{{ env_name }}' +cloudformation_stack_template_bucket: 'aws-web-stacks-trafficstops' +cloudformation_stack_template_bucket_path: 'templates/cdn.yml' +cloudformation_stack_template_local_path: '{{ playbook_dir + "/stack/cloudfront.yml" }}' +cloudformation_stack_template_parameters: + AppCloudFrontCertArn: arn:aws:acm:us-east-1:606178775542:certificate/5bcc16cb-7ca3-4277-8992-a4e20500cc0b + AppCloudFrontForwardedHeaders: "Host,Authorization" + DomainName: "{{ cloudfront_domain_name }}" + AppCloudFrontOriginDomainName: "{{ cloudfront_domain_name_origin }}" + # Allow key-less access to create CloudFront invalidations + AppCloudFrontRoleArn: trafficstops-stack-ContainerInstanceRole-1XMBM3VLAYOVE + # Required for the CloudFront template + DomainNameAlternates: "" + CustomAppCertificateArn: "" + CertificateValidationMethod: "(none)" +cloudformation_stack_tags: + Environment: "{{ env_name }}" diff --git a/deploy/group_vars/k8s.yml b/deploy/group_vars/k8s.yml index 25cda4d4..e4a79d4c 100644 --- a/deploy/group_vars/k8s.yml +++ b/deploy/group_vars/k8s.yml @@ -40,7 +40,7 @@ k8s_collectstatic_enabled: false # Auxillary pod configuration: k8s_worker_enabled: true -k8s_worker_replicas: 1 +k8s_worker_replicas: 2 k8s_worker_celery_app: "traffic_stops" k8s_worker_command: - newrelic-admin @@ -103,6 +103,9 @@ env_email_host_password: !vault | 31326137623163613135346565346632623661303839653038333866363565623865363766326465 3232353563333332396133636565626662366332356638303166 env_email_use_tls: "true" +# Basic auth +env_basicauth_username: "" +env_basicauth_password: "" k8s_environment_variables: CONTAINER_IMAGE_TAG: "{{ k8s_container_image_tag }}" @@ -115,6 +118,9 @@ k8s_environment_variables: ALLOWED_HOSTS: "{{ k8s_domain_names[0] }}" ENVIRONMENT: "{{ env_name }}" CACHE_HOST: "{{ env_cache_host }}" + CACHE_CLOUDFRONT_DISTRIBUTION_ID: "{{ env_cache_cloudfront_distribution_id }}" + CACHE_BASICAUTH_USERNAME: "{{ env_basicauth_username }}" + CACHE_BASICAUTH_PASSWORD: "{{ env_basicauth_password }}" BROKER_URL: "{{ env_broker_url }}" # *** Uploaded media DEFAULT_FILE_STORAGE: "{{ env_default_file_storage }}" diff --git a/deploy/host_vars/production.yml b/deploy/host_vars/production.yml index 91791161..f274c731 100644 --- a/deploy/host_vars/production.yml +++ b/deploy/host_vars/production.yml @@ -11,6 +11,7 @@ k8s_ingress_tls_domains_extra: env_contact_us_emails: - wcarpenter@forwardjustice.org +env_cache_cloudfront_distribution_id: "ERRFDXQVT6ZBM" database_password: !vault | $ANSIBLE_VAULT;1.1;AES256 diff --git a/deploy/host_vars/staging.yml b/deploy/host_vars/staging.yml index ebaa5b71..a069f759 100644 --- a/deploy/host_vars/staging.yml +++ b/deploy/host_vars/staging.yml @@ -7,6 +7,7 @@ env_contact_us_emails: - forwardjustice-team@caktusgroup.com env_media_location: "staging/" +env_cache_cloudfront_distribution_id: E2OFFI0H5HY2N8 database_password: !vault | $ANSIBLE_VAULT;1.1;AES256 @@ -16,6 +17,19 @@ database_password: !vault | 3263343333363130630a373033353533613064653033623138313334623537383037356262383662 36613231353732663637316637383061376566663466373865356539626539376161 +env_basicauth_username: forwardjustice +# Update `k8s_container_htpasswd` below if this password changes. I wasn't able to +# manually hash the password and get it to work. +# https://httpd.apache.org/docs/2.4/misc/password_encryptions.html +# https://docs.ansible.com/ansible/latest/collections/ansible/builtin/password_hash_filter.html +# My failed attempt: "{{ env_basicauth_username }}:{SHA}{{ env_basicauth_password | hash('sha1') | b64encode }}" +env_basicauth_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 64363233356237323034303932306231333765383966306639663465323664643065386635626464 + 3463303836643531613363636239646463383936383837380a316463386662656238653439353431 + 66616461386237636538366165613332306538623038343936316366613832343636313433326534 + 3565623766653963620a333937333535376666346165343036623964623037343461316135663230 + 3662 k8s_container_htpasswd: !vault | $ANSIBLE_VAULT;1.1;AES256 31356461656536343532333632356338616462346436386566643438376237333935373531633762 diff --git a/deploy/inventory b/deploy/inventory index 207be69c..1652bfb1 100644 --- a/deploy/inventory +++ b/deploy/inventory @@ -2,5 +2,9 @@ staging production +[cdn] +staging +production + [cluster] aws.amazon.com ansible_connection=local ansible_python_interpreter='/usr/bin/env python3' diff --git a/deploy/requirements.yml b/deploy/requirements.yml index e8c76a4d..f88ba316 100755 --- a/deploy/requirements.yml +++ b/deploy/requirements.yml @@ -6,7 +6,7 @@ - src: https://github.com/caktus/ansible-role-aws-web-stacks name: caktus.aws-web-stacks - version: '' + version: v0.3.0 - src: https://github.com/caktus/ansible-role-k8s-web-cluster name: caktus.k8s-web-cluster diff --git a/deploy/stack/cloudfront.yml b/deploy/stack/cloudfront.yml new file mode 100644 index 00000000..c9da4876 --- /dev/null +++ b/deploy/stack/cloudfront.yml @@ -0,0 +1,250 @@ +# This Cloudformation stack template was generated by +# https://github.com/caktus/aws-web-stacks +# at 2025-01-14 14:14:07.595026 +# with parameters: +# USE_CLOUDFRONT = on + +Conditions: + AppCloudFrontCertArnCondition: !Not + - !Equals + - !Ref 'AppCloudFrontCertArn' + - '' + AppCloudFrontForwardedHeadersCondition: !Not + - !Equals + - !Join + - '' + - !Ref 'AppCloudFrontForwardedHeaders' + - '' + CertificateCondition: !Or + - !Not + - !Equals + - !Ref 'CustomAppCertificateArn' + - '' + - !Not + - !Equals + - !Ref 'CertificateValidationMethod' + - (none) + CustomAppCertArnCondition: !Not + - !Equals + - !Ref 'CustomAppCertificateArn' + - '' + NoAlternateDomains: !Equals + - !Join + - '' + - !Ref 'DomainNameAlternates' + - '' + StackCertificateCondition: !Not + - !Equals + - !Ref 'CertificateValidationMethod' + - (none) + UsEast1Condition: !Equals + - !Ref 'AWS::Region' + - us-east-1 +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: Global + Parameters: + - DomainName + - DomainNameAlternates + - CertificateValidationMethod + - CustomAppCertificateArn + - Label: + default: Application Server + Parameters: + - AppCloudFrontOriginDomainName + - AppCloudFrontRoleArn + - AppCloudFrontOriginRequestPolicyId + - AppCloudFrontProtocolPolicy + - AppCloudFrontForwardedHeaders + - AppCloudFrontCertArn + ParameterLabels: + AppCloudFrontCertArn: + default: CloudFront SSL Certificate ARN + AppCloudFrontForwardedHeaders: + default: CloudFront Forwarded Headers + AppCloudFrontOriginDomainName: + default: CloudFront Origin Domain Name + AppCloudFrontOriginRequestPolicyId: + default: Origin Request Policy ID + AppCloudFrontProtocolPolicy: + default: CloudFront Protocol Policy + AppCloudFrontRoleArn: + default: CloudFront Role ARN + CertificateValidationMethod: + default: Certificate Validation Method + CustomAppCertificateArn: + default: Custom App Certificate ARN + DomainName: + default: Domain Name + DomainNameAlternates: + default: Alternate Domain Names +Outputs: + AppCloudFrontDomainName: + Description: The app CDN domain name + Value: !GetAtt 'AppCloudFrontDistribution.DomainName' +Parameters: + AppCloudFrontCertArn: + Description: If your stack is NOT in the us-east-1 you must manually create an ACM certificate for your application domain in the us-east-1 region and provide its ARN here. + Type: String + AppCloudFrontForwardedHeaders: + Default: '' + Description: >- + The CachePolicy headers that will be forwarded to the origin and used in the cache key. The 'Host' header is required for SSL on an Elastic Load Balancer, but it should NOT be passed to a Lambda Function + URL. + Type: CommaDelimitedList + AppCloudFrontOriginDomainName: + Default: '' + Description: Domain name of the origin server + Type: String + AppCloudFrontOriginRequestPolicyId: + Default: 216adef6-5c7f-47e4-b989-5492eafa07d3 + Description: The unique identifier of the origin request policy to attach to the app cache behavior + Type: String + AppCloudFrontProtocolPolicy: + AllowedValues: + - redirect-to-https + - https-only + - allow-all + Default: redirect-to-https + Description: 'The protocols allowed by the application server''s CloudFront distribution. See: http://docs.aws.amazon.com/cloudfront/latest/APIReference/API_DefaultCacheBehavior.html' + Type: String + AppCloudFrontRoleArn: + Default: '' + Description: ARN of the role to add IAM permissions for invalidating this distribution + Type: String + CertificateValidationMethod: + AllowedValues: + - (none) + - DNS + - Email + Default: DNS + Description: >- + How to validate domain ownership for issuing an SSL certificate - highly recommend DNS. DNS and Email will pause stack creation until you do something to complete the validation. If omitted, an HTTPS + listener can be manually attached to the load balancer after stack creation. + Type: String + CustomAppCertificateArn: + Description: An existing ACM certificate ARN to be used by the application ELB. DNS and Email validation will not work with this option. + Type: String + DomainName: + Description: The fully-qualified domain name for the application. + Type: String + DomainNameAlternates: + Description: A comma-separated list of Alternate FQDNs to be included in the Subject Alternative Name extension of the SSL certificate. + Type: CommaDelimitedList +Resources: + AppCloudFrontCachePolicy: + Properties: + CachePolicyConfig: + DefaultTTL: 86400 + MaxTTL: 31536000 + MinTTL: 0 + Name: !Join + - '-' + - - AppCachePolicy + - !Ref 'AWS::StackName' + ParametersInCacheKeyAndForwardedToOrigin: + CookiesConfig: + CookieBehavior: none + EnableAcceptEncodingBrotli: true + EnableAcceptEncodingGzip: true + HeadersConfig: !If + - AppCloudFrontForwardedHeadersCondition + - HeaderBehavior: whitelist + Headers: !Ref 'AppCloudFrontForwardedHeaders' + - HeaderBehavior: none + QueryStringsConfig: + QueryStringBehavior: all + Type: AWS::CloudFront::CachePolicy + AppCloudFrontDistribution: + Properties: + DistributionConfig: + Aliases: !Split + - ; + - !Join + - '' + - - !Ref 'DomainName' + - !If + - NoAlternateDomains + - '' + - ; + - !Join + - ; + - !Ref 'DomainNameAlternates' + DefaultCacheBehavior: + AllowedMethods: + - DELETE + - GET + - HEAD + - OPTIONS + - PATCH + - POST + - PUT + CachePolicyId: !Ref 'AppCloudFrontCachePolicy' + CachedMethods: + - HEAD + - GET + Compress: true + OriginRequestPolicyId: !Ref 'AppCloudFrontOriginRequestPolicyId' + TargetOriginId: ApplicationServer + ViewerProtocolPolicy: !Ref 'AppCloudFrontProtocolPolicy' + Enabled: true + HttpVersion: http2 + Origins: + - CustomOriginConfig: + OriginProtocolPolicy: https-only + DomainName: !Ref 'AppCloudFrontOriginDomainName' + Id: ApplicationServer + ViewerCertificate: !If + - UsEast1Condition + - AcmCertificateArn: !If + - CustomAppCertArnCondition + - !Ref 'CustomAppCertificateArn' + - !Ref 'Certificate' + MinimumProtocolVersion: TLSv1.2_2021 + SslSupportMethod: sni-only + - !If + - AppCloudFrontCertArnCondition + - AcmCertificateArn: !Ref 'AppCloudFrontCertArn' + MinimumProtocolVersion: TLSv1.2_2021 + SslSupportMethod: sni-only + - !Ref 'AWS::NoValue' + Tags: + - Key: aws-web-stacks:stack-name + Value: !Ref 'AWS::StackName' + Type: AWS::CloudFront::Distribution + AppCloudFrontInvalidationPolicy: + Properties: + PolicyDocument: + Statement: + - Action: + - cloudfront:GetDistribution + - cloudfront:GetDistributionConfig + - cloudfront:ListDistributions + - cloudfront:ListCloudFrontOriginAccessIdentities + - cloudfront:CreateInvalidation + - cloudfront:GetInvalidation + - cloudfront:ListInvalidations + Effect: Allow + Resource: '*' + PolicyName: AppCloudFrontInvalidationPolicy + Roles: + - !Ref 'AppCloudFrontRoleArn' + Type: AWS::IAM::Policy + Certificate: + Condition: StackCertificateCondition + Properties: + DomainName: !Ref 'DomainName' + DomainValidationOptions: + - DomainName: !Ref 'DomainName' + ValidationDomain: !Ref 'DomainName' + SubjectAlternativeNames: !If + - NoAlternateDomains + - !Ref 'AWS::NoValue' + - !Ref 'DomainNameAlternates' + Tags: + - Key: aws-web-stacks:stack-name + Value: !Ref 'AWS::StackName' + ValidationMethod: !Ref 'CertificateValidationMethod' + Type: AWS::CertificateManager::Certificate diff --git a/deploy/stack/eks-no-nat.yml b/deploy/stack/eks-no-nat.yml index becdda7b..62792bd8 100644 --- a/deploy/stack/eks-no-nat.yml +++ b/deploy/stack/eks-no-nat.yml @@ -514,6 +514,7 @@ Parameters: - t3.large - t3.xlarge - t3.2xlarge + - t4g.large - t2.nano - t2.micro - t2.small diff --git a/docker-compose.yml b/docker-compose.yml index c690031d..0c7d8b1a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.4' - services: db: environment: diff --git a/docs/data-import.rst b/docs/data-import.rst index 17e6e3bb..6e5c86c3 100644 --- a/docs/data-import.rst +++ b/docs/data-import.rst @@ -25,8 +25,8 @@ To load an existing database dump on S3, run: .. code-block:: bash dropdb traffic_stops_nc && createdb -E UTF-8 traffic_stops_nc - aws s3 cp s3://traffic-stops/traffic_stops_nc.dump . - pg_restore -Ox -d traffic_stops_nc traffic_stops_nc.dump + aws s3 cp s3://forwardjustice-trafficstops/trafficstops-staging_database.dump . + pg_restore -Ox -d traffic_stops_nc trafficstops-staging_database.dump Raw NC Data (slower) diff --git a/docs/deploy.rst b/docs/deploy.rst index 41d86ac5..b4808434 100644 --- a/docs/deploy.rst +++ b/docs/deploy.rst @@ -111,3 +111,13 @@ Deploy application 4. Deploy:: inv staging deploy --tag=... + + +CloudFront distributions +------------------------------------------------ + +The application is behind a CloudFront distribution. + +To deploy the distribution, run:: + + ansible-playbook deploy-cf-stack.yml -t cdn -vvvv diff --git a/docs/dev-setup.rst b/docs/dev-setup.rst index fd83b77f..a09c3eae 100755 --- a/docs/dev-setup.rst +++ b/docs/dev-setup.rst @@ -66,7 +66,7 @@ Getting Started (Manual) Run PostgreSQL in Docker:: - docker-compose up -d + docker compose up -d db redis This will create a PostgreSQL server with multiple databases (see ``docker-entrypoint.postgres.sql``). diff --git a/nc/data/importer.py b/nc/data/importer.py index 2e86d44e..95dba374 100755 --- a/nc/data/importer.py +++ b/nc/data/importer.py @@ -8,13 +8,11 @@ from pathlib import Path from django.conf import settings -from django.core.cache import cache from django.core.mail import EmailMessage from django.db import connections, transaction from nc.data import copy_nc -from nc.models import Agency, Search, Stop, StopSummary -from nc.prime_cache import run as prime_cache_run +from nc.models import Agency, ContrabandSummary, Search, Stop, StopSummary from tsdata.dataset_facts import compute_dataset_facts from tsdata.sql import drop_constraints_and_indexes from tsdata.utils import call, download_and_unzip_data, line_count, unzip_data @@ -85,9 +83,6 @@ def run(url, destination=None, zip_path=None, min_stop_id=None, max_stop_id=None copy_from(destination, nc_agency_csv) logger.info("NC Data Import Complete") - # Clear the query cache to get rid of NC queries made on old data - cache.clear() - # fix landing page data facts = compute_dataset_facts( Agency, Stop, settings.NC_KEY, Search=Search, override_start_date=override_start_date @@ -95,13 +90,20 @@ def run(url, destination=None, zip_path=None, min_stop_id=None, max_stop_id=None logger.info("NC dataset facts: %r", facts) # update materialized view - logger.info("Updating materialized view") + logger.info("Updating materialized views") StopSummary.refresh() - logger.info("Materialized view updated") + ContrabandSummary.refresh() + logger.info("Materialized views updated") # prime the query cache for large NC agencies if prime_cache: - prime_cache_run() + from nc.tasks import prime_all_endpoints + + prime_all_endpoints.delay( + clear_cache=True, + skip_agencies=False, + skip_officers=True, + ) def truncate_input_data(destination, min_stop_id, max_stop_id): diff --git a/nc/filters.py b/nc/filters.py index 242041fb..146eada6 100755 --- a/nc/filters.py +++ b/nc/filters.py @@ -6,7 +6,7 @@ class DriverStopsFilter(filters.FilterSet): agency = filters.ModelChoiceFilter( - queryset=models.Agency.objects.no_cache().all(), + queryset=models.Agency.objects.all(), label="Agency", method="filter_agency", required=True, diff --git a/nc/management/commands/prime_cache.py b/nc/management/commands/prime_cache.py index a479ea64..a07b1c5a 100755 --- a/nc/management/commands/prime_cache.py +++ b/nc/management/commands/prime_cache.py @@ -1,32 +1,25 @@ from django.core.management.base import BaseCommand -from nc import prime_cache +from nc.tasks import prime_all_endpoints class Command(BaseCommand): """Prime cache on production server""" def add_arguments(self, parser): - parser.add_argument( - "--cutoff-duration-secs", - dest="cutoff", - help="Stop priming cache for agencies once it takes less than this", - ) parser.add_argument("--clear-cache", "-c", action="store_true", default=False) parser.add_argument("--skip-agencies", action="store_true", default=False) parser.add_argument("--skip-officers", action="store_true", default=True) + parser.add_argument("--agency-cutoff-count", type=int, default=0) parser.add_argument( - "--officer-cutoff-count", - type=int, - default=None, + "--agency-id", action="append", default=[], type=int, help="Agency ID to prime" ) def handle(self, *args, **options): - cutoff = float(options["cutoff"]) if options["cutoff"] else None - prime_cache.run( - cutoff_duration_secs=cutoff, + prime_all_endpoints.delay( clear_cache=options["clear_cache"], skip_agencies=options["skip_agencies"], skip_officers=options["skip_officers"], - officer_cutoff_count=options["officer_cutoff_count"], + agency_cutoff_count=options["agency_cutoff_count"], + limit_to_agencies=options["agency_id"], ) diff --git a/nc/middleware.py b/nc/middleware.py new file mode 100644 index 00000000..308b62d0 --- /dev/null +++ b/nc/middleware.py @@ -0,0 +1,36 @@ +import json +import logging + +from django.http import HttpRequest, HttpResponse +from django.utils.timezone import now + +logger = logging.getLogger(__name__) + + +class RequestLoggingMiddleware: + """Log request and response headers""" + + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request: HttpRequest): + headers = {"_type": "request", "_path": request.get_full_path(), "_now": now().isoformat()} + headers.update(request.headers) + for header in ("wsgi.input", "wsgi.errors", "wsgi.file_wrapper"): + headers.pop(header, None) + print(json.dumps(headers)) + return self.get_response(request) + + +class ResponseLoggingMiddleware: + """Log response headers""" + + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request: HttpRequest): + response: HttpResponse = self.get_response(request) + headers = {"_type": "response", "_path": request.get_full_path(), "_now": now().isoformat()} + headers.update(response.headers) + print(json.dumps(headers)) + return response diff --git a/nc/migrations/0001_initial.py b/nc/migrations/0001_initial.py index 50f04ca3..7cc41e04 100644 --- a/nc/migrations/0001_initial.py +++ b/nc/migrations/0001_initial.py @@ -1,133 +1,214 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import caching.base - from django.db import migrations, models class Migration(migrations.Migration): - dependencies = [ - ] + dependencies = [] operations = [ migrations.CreateModel( - name='Agency', + name="Agency", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, verbose_name='ID', serialize=False)), - ('name', models.CharField(max_length=255)), + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, verbose_name="ID", serialize=False + ), + ), + ("name", models.CharField(max_length=255)), ], options={ - 'verbose_name_plural': 'Agencies', + "verbose_name_plural": "Agencies", }, - bases=(caching.base.CachingMixin, models.Model), + bases=(models.Model,), ), migrations.CreateModel( - name='Contraband', + name="Contraband", fields=[ - ('contraband_id', models.IntegerField(primary_key=True, serialize=False)), - ('ounces', models.FloatField(null=True, default=0)), - ('pounds', models.FloatField(null=True, default=0)), - ('pints', models.FloatField(null=True, default=0)), - ('gallons', models.FloatField(null=True, default=0)), - ('dosages', models.FloatField(null=True, default=0)), - ('grams', models.FloatField(null=True, default=0)), - ('kilos', models.FloatField(null=True, default=0)), - ('money', models.FloatField(null=True, default=0)), - ('weapons', models.FloatField(null=True, default=0)), - ('dollar_amount', models.FloatField(null=True, default=0)), + ("contraband_id", models.IntegerField(primary_key=True, serialize=False)), + ("ounces", models.FloatField(null=True, default=0)), + ("pounds", models.FloatField(null=True, default=0)), + ("pints", models.FloatField(null=True, default=0)), + ("gallons", models.FloatField(null=True, default=0)), + ("dosages", models.FloatField(null=True, default=0)), + ("grams", models.FloatField(null=True, default=0)), + ("kilos", models.FloatField(null=True, default=0)), + ("money", models.FloatField(null=True, default=0)), + ("weapons", models.FloatField(null=True, default=0)), + ("dollar_amount", models.FloatField(null=True, default=0)), ], - bases=(caching.base.CachingMixin, models.Model), + bases=(models.Model,), ), migrations.CreateModel( - name='Person', + name="Person", fields=[ - ('person_id', models.IntegerField(primary_key=True, serialize=False)), - ('type', models.CharField(choices=[('D', 'Driver'), ('P', 'Passenger')], max_length=2)), - ('age', models.PositiveSmallIntegerField()), - ('gender', models.CharField(choices=[('M', 'Male'), ('F', 'Female')], max_length=2)), - ('ethnicity', models.CharField(choices=[('H', 'Hispanic'), ('N', 'Non-Hispanic')], max_length=2)), - ('race', models.CharField(choices=[('A', 'Asian'), ('B', 'Black'), ('I', 'Native American'), ('U', 'Other'), ('W', 'White')], max_length=2)), + ("person_id", models.IntegerField(primary_key=True, serialize=False)), + ( + "type", + models.CharField(choices=[("D", "Driver"), ("P", "Passenger")], max_length=2), + ), + ("age", models.PositiveSmallIntegerField()), + ( + "gender", + models.CharField(choices=[("M", "Male"), ("F", "Female")], max_length=2), + ), + ( + "ethnicity", + models.CharField( + choices=[("H", "Hispanic"), ("N", "Non-Hispanic")], max_length=2 + ), + ), + ( + "race", + models.CharField( + choices=[ + ("A", "Asian"), + ("B", "Black"), + ("I", "Native American"), + ("U", "Other"), + ("W", "White"), + ], + max_length=2, + ), + ), ], - bases=(caching.base.CachingMixin, models.Model), + bases=(models.Model,), ), migrations.CreateModel( - name='Search', + name="Search", fields=[ - ('search_id', models.IntegerField(primary_key=True, serialize=False)), - ('type', models.PositiveSmallIntegerField(choices=[(1, 'Consent'), (2, 'Search Warrant'), (3, 'Probable Cause'), (4, 'Search Incident to Arrest'), (5, 'Protective Frisk')])), - ('vehicle_search', models.BooleanField(default=False)), - ('driver_search', models.BooleanField(default=False)), - ('passenger_search', models.BooleanField(default=False)), - ('property_search', models.BooleanField(default=False)), - ('vehicle_siezed', models.BooleanField(default=False)), - ('personal_property_siezed', models.BooleanField(default=False)), - ('other_property_sized', models.BooleanField(default=False)), - ('person', models.ForeignKey(to='nc.Person', on_delete=models.CASCADE)), + ("search_id", models.IntegerField(primary_key=True, serialize=False)), + ( + "type", + models.PositiveSmallIntegerField( + choices=[ + (1, "Consent"), + (2, "Search Warrant"), + (3, "Probable Cause"), + (4, "Search Incident to Arrest"), + (5, "Protective Frisk"), + ] + ), + ), + ("vehicle_search", models.BooleanField(default=False)), + ("driver_search", models.BooleanField(default=False)), + ("passenger_search", models.BooleanField(default=False)), + ("property_search", models.BooleanField(default=False)), + ("vehicle_siezed", models.BooleanField(default=False)), + ("personal_property_siezed", models.BooleanField(default=False)), + ("other_property_sized", models.BooleanField(default=False)), + ("person", models.ForeignKey(to="nc.Person", on_delete=models.CASCADE)), ], - bases=(caching.base.CachingMixin, models.Model), + bases=(models.Model,), ), migrations.CreateModel( - name='SearchBasis', + name="SearchBasis", fields=[ - ('search_basis_id', models.IntegerField(primary_key=True, serialize=False)), - ('basis', models.CharField(choices=[('ER', 'Erratic/Suspicious Behavior'), ('OB', 'Observation of Suspected Contraband'), ('OI', 'Other Official Information'), ('SM', 'Suspicious Movement'), ('TIP', 'Informant Tip'), ('WTNS', 'Witness Observation')], max_length=4)), - ('person', models.ForeignKey(to='nc.Person', on_delete=models.CASCADE)), - ('search', models.ForeignKey(to='nc.Search', on_delete=models.CASCADE)), + ("search_basis_id", models.IntegerField(primary_key=True, serialize=False)), + ( + "basis", + models.CharField( + choices=[ + ("ER", "Erratic/Suspicious Behavior"), + ("OB", "Observation of Suspected Contraband"), + ("OI", "Other Official Information"), + ("SM", "Suspicious Movement"), + ("TIP", "Informant Tip"), + ("WTNS", "Witness Observation"), + ], + max_length=4, + ), + ), + ("person", models.ForeignKey(to="nc.Person", on_delete=models.CASCADE)), + ("search", models.ForeignKey(to="nc.Search", on_delete=models.CASCADE)), ], - bases=(caching.base.CachingMixin, models.Model), + bases=(models.Model,), ), migrations.CreateModel( - name='Stop', + name="Stop", fields=[ - ('stop_id', models.PositiveIntegerField(primary_key=True, serialize=False)), - ('agency_description', models.CharField(max_length=100)), - ('date', models.DateTimeField()), - ('purpose', models.PositiveSmallIntegerField(choices=[(1, 'Speed Limit Violation'), (2, 'Stop Light/Sign Violation'), (3, 'Driving While Impaired'), (4, 'Safe Movement Violation'), (5, 'Vehicle Equipment Violation'), (6, 'Vehicle Regulatory Violation'), (7, 'Seat Belt Violation'), (8, 'Investigation'), (9, 'Other Motor Vehicle Violation'), (10, 'Checkpoint')])), - ('action', models.PositiveSmallIntegerField(choices=[(1, 'Verbal Warning'), (2, 'Written Warning'), (3, 'Citation Issued'), (4, 'On-View Arrest'), (5, 'No Action Taken')])), - ('driver_arrest', models.BooleanField(default=False)), - ('passenger_arrest', models.BooleanField(default=False)), - ('encounter_force', models.BooleanField(default=False)), - ('engage_force', models.BooleanField(default=False)), - ('officer_injury', models.BooleanField(default=False)), - ('driver_injury', models.BooleanField(default=False)), - ('passenger_injury', models.BooleanField(default=False)), - ('officer_id', models.CharField(max_length=15)), - ('stop_location', models.CharField(max_length=15)), - ('stop_city', models.CharField(max_length=20)), - ('agency', models.ForeignKey(null=True, to='nc.Agency', related_name='stops', on_delete=models.CASCADE)), + ("stop_id", models.PositiveIntegerField(primary_key=True, serialize=False)), + ("agency_description", models.CharField(max_length=100)), + ("date", models.DateTimeField()), + ( + "purpose", + models.PositiveSmallIntegerField( + choices=[ + (1, "Speed Limit Violation"), + (2, "Stop Light/Sign Violation"), + (3, "Driving While Impaired"), + (4, "Safe Movement Violation"), + (5, "Vehicle Equipment Violation"), + (6, "Vehicle Regulatory Violation"), + (7, "Seat Belt Violation"), + (8, "Investigation"), + (9, "Other Motor Vehicle Violation"), + (10, "Checkpoint"), + ] + ), + ), + ( + "action", + models.PositiveSmallIntegerField( + choices=[ + (1, "Verbal Warning"), + (2, "Written Warning"), + (3, "Citation Issued"), + (4, "On-View Arrest"), + (5, "No Action Taken"), + ] + ), + ), + ("driver_arrest", models.BooleanField(default=False)), + ("passenger_arrest", models.BooleanField(default=False)), + ("encounter_force", models.BooleanField(default=False)), + ("engage_force", models.BooleanField(default=False)), + ("officer_injury", models.BooleanField(default=False)), + ("driver_injury", models.BooleanField(default=False)), + ("passenger_injury", models.BooleanField(default=False)), + ("officer_id", models.CharField(max_length=15)), + ("stop_location", models.CharField(max_length=15)), + ("stop_city", models.CharField(max_length=20)), + ( + "agency", + models.ForeignKey( + null=True, to="nc.Agency", related_name="stops", on_delete=models.CASCADE + ), + ), ], - bases=(caching.base.CachingMixin, models.Model), + bases=(models.Model,), ), migrations.AddField( - model_name='searchbasis', - name='stop', - field=models.ForeignKey(to='nc.Stop', on_delete=models.CASCADE), + model_name="searchbasis", + name="stop", + field=models.ForeignKey(to="nc.Stop", on_delete=models.CASCADE), ), migrations.AddField( - model_name='search', - name='stop', - field=models.ForeignKey(to='nc.Stop', on_delete=models.CASCADE), + model_name="search", + name="stop", + field=models.ForeignKey(to="nc.Stop", on_delete=models.CASCADE), ), migrations.AddField( - model_name='person', - name='stop', - field=models.ForeignKey(to='nc.Stop', on_delete=models.CASCADE), + model_name="person", + name="stop", + field=models.ForeignKey(to="nc.Stop", on_delete=models.CASCADE), ), migrations.AddField( - model_name='contraband', - name='person', - field=models.ForeignKey(to='nc.Person', on_delete=models.CASCADE), + model_name="contraband", + name="person", + field=models.ForeignKey(to="nc.Person", on_delete=models.CASCADE), ), migrations.AddField( - model_name='contraband', - name='search', - field=models.ForeignKey(to='nc.Search', on_delete=models.CASCADE), + model_name="contraband", + name="search", + field=models.ForeignKey(to="nc.Search", on_delete=models.CASCADE), ), migrations.AddField( - model_name='contraband', - name='stop', - field=models.ForeignKey(to='nc.Stop', on_delete=models.CASCADE), + model_name="contraband", + name="stop", + field=models.ForeignKey(to="nc.Stop", on_delete=models.CASCADE), ), ] diff --git a/nc/models.py b/nc/models.py index 73d24498..0edd641b 100755 --- a/nc/models.py +++ b/nc/models.py @@ -1,5 +1,5 @@ -from caching.base import CachingManager, CachingMixin from django.db import models +from django.db.models.functions import ExtractYear from django.utils.html import format_html from django_pgviews import view as pg @@ -133,7 +133,7 @@ class DriverEthnicity(models.TextChoices): ) -class Stop(CachingMixin, models.Model): +class Stop(models.Model): stop_id = models.PositiveIntegerField(primary_key=True) agency_description = models.CharField(max_length=100) agency = models.ForeignKey("Agency", null=True, related_name="stops", on_delete=models.CASCADE) @@ -151,10 +151,8 @@ class Stop(CachingMixin, models.Model): stop_location = models.CharField(max_length=15) # todo: keys stop_city = models.CharField(max_length=20) - objects = CachingManager() - -class Person(CachingMixin, models.Model): +class Person(models.Model): person_id = models.IntegerField(primary_key=True) stop = models.ForeignKey(Stop, on_delete=models.CASCADE) type = models.CharField(max_length=2, choices=PERSON_TYPE_CHOICES) @@ -163,10 +161,8 @@ class Person(CachingMixin, models.Model): ethnicity = models.CharField(max_length=2, choices=ETHNICITY_CHOICES) race = models.CharField(max_length=2, choices=RACE_CHOICES) - objects = CachingManager() - -class Search(CachingMixin, models.Model): +class Search(models.Model): search_id = models.IntegerField(primary_key=True) stop = models.ForeignKey(Stop, on_delete=models.CASCADE) person = models.ForeignKey(Person, on_delete=models.CASCADE) @@ -179,10 +175,8 @@ class Search(CachingMixin, models.Model): personal_property_siezed = models.BooleanField(default=False) other_property_sized = models.BooleanField(default=False) - objects = CachingManager() - -class Contraband(CachingMixin, models.Model): +class Contraband(models.Model): contraband_id = models.IntegerField(primary_key=True) search = models.ForeignKey(Search, on_delete=models.CASCADE) person = models.ForeignKey(Person, on_delete=models.CASCADE) @@ -198,27 +192,21 @@ class Contraband(CachingMixin, models.Model): weapons = models.FloatField(default=0, null=True) dollar_amount = models.FloatField(default=0, null=True) - objects = CachingManager() - -class SearchBasis(CachingMixin, models.Model): +class SearchBasis(models.Model): search_basis_id = models.IntegerField(primary_key=True) search = models.ForeignKey(Search, on_delete=models.CASCADE) person = models.ForeignKey(Person, on_delete=models.CASCADE) stop = models.ForeignKey(Stop, on_delete=models.CASCADE) basis = models.CharField(max_length=4, choices=SEARCH_BASIS_CHOICES) - objects = CachingManager() - -class Agency(CachingMixin, models.Model): +class Agency(models.Model): name = models.CharField(max_length=255) # link to CensusProfile (no cross-database foreign key) census_profile_id = models.CharField(max_length=16, blank=True, default="") last_reported_stop = models.DateField(null=True) - objects = CachingManager() - class Meta(object): verbose_name_plural = "Agencies" @@ -303,6 +291,12 @@ class StopSummary(pg.ReadOnlyMaterializedView): class Meta: managed = False indexes = [ + models.Index(fields=["agency"]), + models.Index(fields=["date"]), + models.Index( + ExtractYear("date").desc(), + name="stopsummary_year_desc_idx", + ), models.Index(fields=["agency", "officer_id", "search_type"]), models.Index(fields=["agency", "date"]), models.Index(fields=["engage_force"]), diff --git a/nc/notebooks/2024-10-cache/.gitignore b/nc/notebooks/2024-10-cache/.gitignore new file mode 100644 index 00000000..f63ccdd7 --- /dev/null +++ b/nc/notebooks/2024-10-cache/.gitignore @@ -0,0 +1 @@ +input/* diff --git a/nc/notebooks/2024-10-cache/cloudfront-debugging.ipynb b/nc/notebooks/2024-10-cache/cloudfront-debugging.ipynb new file mode 100644 index 00000000..aefa3765 --- /dev/null +++ b/nc/notebooks/2024-10-cache/cloudfront-debugging.ipynb @@ -0,0 +1,751 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1c7e89ae-fc35-441a-8a33-a2194fc535d7", + "metadata": {}, + "source": [ + "## Invalidate cache" + ] + }, + { + "cell_type": "code", + "execution_count": 246, + "id": "effa0419-c5e2-471b-91e3-0211342034c1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"Location\": \"https://cloudfront.amazonaws.com/2020-05-31/distribution/E2OFFI0H5HY2N8/invalidation/I3450RH5B2N751UJGK2M1EYRJR\",\n", + " \"Invalidation\": {\n", + " \"Id\": \"I3450RH5B2N751UJGK2M1EYRJR\",\n", + " \"Status\": \"InProgress\",\n", + " \"CreateTime\": \"2024-10-29T14:45:36.877Z\",\n", + " \"InvalidationBatch\": {\n", + " \"Paths\": {\n", + " \"Quantity\": 1,\n", + " \"Items\": [\n", + " \"/*\"\n", + " ]\n", + " },\n", + " \"CallerReference\": \"cli-1730213136-577351\"\n", + " }\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "! aws cloudfront create-invalidation --distribution-id E2OFFI0H5HY2N8 --paths \"/*\"" + ] + }, + { + "cell_type": "markdown", + "id": "b96a79a9-b07d-459c-8145-878db045221e", + "metadata": {}, + "source": [ + "## Prime cache" + ] + }, + { + "cell_type": "code", + "execution_count": 447, + "id": "df113a0e-ad17-4582-8d87-e58058f33e08", + "metadata": {}, + "outputs": [], + "source": [ + "! kubectl -n trafficstops-staging exec -it deploy/app -- python manage.py prime_cache --clear-cache --agency-cutoff-count=30000000" + ] + }, + { + "cell_type": "code", + "execution_count": 448, + "id": "588e4485-93e5-475f-99b7-3a32ff283b4f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-10-29 03:00:00,116: INFO/ForkPoolWorker-2] nc.tasks.download_and_import_nc_dataset[762a4b77-c7b9-49e7-ad86-cfa4574c943a]: Triggering automatic NC import\n", + "[2024-10-29 04:38:20,846: INFO/ForkPoolWorker-2] tsdata.tasks.import_dataset[5509fcb7-c006-445d-8ca6-1254647d4c70]: Import complete\n", + "2024-10-29 03:00:00,370 celery.app.trace INFO Task nc.tasks.download_and_import_nc_dataset[762a4b77-c7b9-49e7-ad86-cfa4574c943a] succeeded in 0.2555248634889722s: None\n", + "2024-10-29 13:23:06,222 celery.worker.strategy INFO Task nc.tasks.prime_all_endpoints[08c0be8f-8af9-4767-8e3e-a7c3ef383a18] received\n", + "2024-10-29 13:23:06,233 nc.prime_cache INFO Invalidating CloudFront distribution (settings.CACHE_CLOUDFRONT_DISTRIBUTION_ID='E2OFFI0H5HY2N8')\n", + "2024-10-29 13:23:06,320 botocore.credentials INFO Found credentials from IAM Role: trafficstops-stack-ContainerInstanceRole-1XMBM3VLAYOVE\n", + "[2024-10-29 13:23:07,489: INFO/ForkPoolWorker-2] nc.tasks.prime_groups_cache[None]: Querying agency endpoint groups (by_officer=False, cutoff_count=30000000)\n", + "[2024-10-29 13:23:14,713: INFO/ForkPoolWorker-2] nc.tasks.prime_groups_cache[None]: Queuing 341 agency endpoint groups\n", + "[2024-10-29 13:23:14,717: INFO/ForkPoolWorker-2] nc.tasks.prime_groups_cache[None]: Stopping due to cutoff (endpoint_group.num_stops=13420770, cutoff_count=30000000)\n", + "[2024-10-29 13:23:14,718: INFO/ForkPoolWorker-2] nc.tasks.prime_all_endpoints[08c0be8f-8af9-4767-8e3e-a7c3ef383a18]: Complete\n", + "2024-10-29 13:23:14,720 celery.app.trace INFO Task nc.tasks.prime_all_endpoints[08c0be8f-8af9-4767-8e3e-a7c3ef383a18] succeeded in 8.489157313480973s: None\n", + "2024-10-29 04:38:21,060 celery.worker.strategy INFO Task tsdata.tasks.compliance_report[7d57f328-6125-4d9e-8289-96294b14f86b] received\n", + "[2024-10-29 04:38:21,063: INFO/ForkPoolWorker-1] tsdata.tasks.compliance_report[7d57f328-6125-4d9e-8289-96294b14f86b]: Generating compliance report\n", + "2024-10-29 04:38:21,100 celery.app.trace INFO Task tsdata.tasks.import_dataset[5509fcb7-c006-445d-8ca6-1254647d4c70] succeeded in 5900.723594412208s: None\n", + "[2024-10-29 04:38:21,547: INFO/ForkPoolWorker-1] tsdata.tasks.compliance_report[7d57f328-6125-4d9e-8289-96294b14f86b]: Updating agency last stop\n", + "2024-10-29 04:46:30,853 celery.app.trace INFO Task tsdata.tasks.compliance_report[7d57f328-6125-4d9e-8289-96294b14f86b] succeeded in 489.7907146802172s: None\n", + "2024-10-29 13:23:14,720 celery.worker.strategy INFO Task nc.tasks.prime_group_cache[1aadffd4-ce36-436d-ab4d-6a3e7d397a03] received\n", + "2024-10-29 13:23:14,724 nc.prime_cache INFO Priming cache (agency_id=-1, officer_id=None, num_stops=30815284)...\n", + "2024-10-29 13:23:14,795 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/stops/\n", + "2024-10-29 13:23:21,852 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/stops_by_reason/\n", + "2024-10-29 13:23:30,895 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/searches/\n", + "2024-10-29 13:23:32,876 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/searches_by_type/\n", + "2024-10-29 13:23:34,860 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/use_of_force/\n", + "2024-10-29 13:23:37,189 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/stops-by-percentage/\n", + "2024-10-29 13:23:48,808 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/stops-by-count/\n", + "2024-10-29 13:23:55,041 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/stop-purpose-groups/\n", + "2024-10-29 13:24:02,849 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/stops-grouped-by-purpose/\n", + "2024-10-29 13:24:10,603 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/contraband/\n", + "2024-10-29 13:24:17,129 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/searches-by-percentage/\n", + "2024-10-29 13:24:25,106 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/searches-by-count/\n", + "2024-10-29 13:24:28,045 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/search-rate/\n", + "2024-10-29 13:24:37,904 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/contraband-stop-purpose/\n", + "2024-10-29 13:24:52,808 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/contraband-grouped-stop-purpose/\n", + "2024-10-29 13:25:03,642 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/contraband-grouped-stop-purpose/modal/\n", + "2024-10-29 13:25:03,997 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/use-of-force/\n", + "2024-10-29 13:25:07,257 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/arrests-percentage-of-stops/\n", + "2024-10-29 13:25:16,574 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/arrests-percentage-of-searches/\n", + "2024-10-29 13:25:26,919 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/arrests-stops-driver-arrested/\n", + "2024-10-29 13:25:37,225 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/arrests-percentage-of-stops-by-purpose-group/\n", + "2024-10-29 13:25:41,654 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/arrests-percentage-of-stops-per-stop-purpose/\n", + "2024-10-29 13:25:45,990 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/arrests-percentage-of-searches-by-purpose-group/\n", + "2024-10-29 13:25:55,940 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/arrests-percentage-of-searches-per-stop-purpose/\n", + "2024-10-29 13:26:04,879 nc.prime_cache DEBUG Querying https://staging.nccopwatch.org/api/agency/-1/arrests-percentage-of-stops-per-contraband-type/\n", + "2024-10-29 13:26:31,134 nc.prime_cache INFO Primed cache (agency_id=-1, officer_id=None, num_stops=30815284)\n", + "2024-10-29 13:26:31,138 celery.app.trace INFO Task nc.tasks.prime_group_cache[1aadffd4-ce36-436d-ab4d-6a3e7d397a03] succeeded in 196.41544181667268s: (-1, None)\n", + "^C\n" + ] + } + ], + "source": [ + "! kubectl -n trafficstops-staging logs -lapp=celery-worker -f" + ] + }, + { + "cell_type": "markdown", + "id": "76546ae7-3308-4b3a-9b32-987eeae25640", + "metadata": {}, + "source": [ + "## Query staging site" + ] + }, + { + "cell_type": "code", + "execution_count": 250, + "id": "f56ff017-ce39-4b7f-9364-e99bed8470e1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HTTP/2 200 \n", + "\u001b[1mcontent-type\u001b[0m: application/json\n", + "\u001b[1mcontent-length\u001b[0m: 2406\n", + "\u001b[1mdate\u001b[0m: Tue, 29 Oct 2024 14:46:24 GMT\n", + "\u001b[1mallow\u001b[0m: GET, HEAD, OPTIONS\n", + "\u001b[1mx-frame-options\u001b[0m: DENY\n", + "\u001b[1mcontent-language\u001b[0m: en\n", + "\u001b[1mx-content-type-options\u001b[0m: nosniff\n", + "\u001b[1mx-xss-protection\u001b[0m: 1; mode=block\n", + "\u001b[1mreferrer-policy\u001b[0m: same-origin\n", + "\u001b[1mstrict-transport-security\u001b[0m: max-age=15724800; includeSubDomains\n", + "\u001b[1mvary\u001b[0m: Accept-Encoding\n", + "\u001b[1mx-cache\u001b[0m: Miss from cloudfront\n", + "\u001b[1mvia\u001b[0m: 1.1 6ffeffd65f1c367ca03a4f9f7ea03546.cloudfront.net (CloudFront)\n", + "\u001b[1mx-amz-cf-pop\u001b[0m: ATL59-P1\n", + "\u001b[1mx-amz-cf-id\u001b[0m: muh4RyrYuqpHDVISpNJvF6D04Q6iFtmCGcX-6nHEqXNYDw1qCplbrA==\n", + "\n" + ] + } + ], + "source": [ + "! curl --user \"forwardjustice:trafficstops\" --head https://staging.nccopwatch.org/api/agency/80/stops/" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "id": "db81f718-44da-4986-9f99-505a3a369a7b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HTTP/2 200 \n", + "\u001b[1mcontent-type\u001b[0m: application/json\n", + "\u001b[1mcontent-length\u001b[0m: 2406\n", + "\u001b[1mdate\u001b[0m: Tue, 29 Oct 2024 14:46:24 GMT\n", + "\u001b[1mallow\u001b[0m: GET, HEAD, OPTIONS\n", + "\u001b[1mx-frame-options\u001b[0m: DENY\n", + "\u001b[1mcontent-language\u001b[0m: en\n", + "\u001b[1mx-content-type-options\u001b[0m: nosniff\n", + "\u001b[1mx-xss-protection\u001b[0m: 1; mode=block\n", + "\u001b[1mreferrer-policy\u001b[0m: same-origin\n", + "\u001b[1mstrict-transport-security\u001b[0m: max-age=15724800; includeSubDomains\n", + "\u001b[1mvary\u001b[0m: Accept-Encoding\n", + "\u001b[1mx-cache\u001b[0m: Hit from cloudfront\n", + "\u001b[1mvia\u001b[0m: 1.1 2a89debb44c0cd298f9ed7c7b3157cfa.cloudfront.net (CloudFront)\n", + "\u001b[1mx-amz-cf-pop\u001b[0m: ATL59-P1\n", + "\u001b[1mx-amz-cf-id\u001b[0m: _re2TbcWlMZGsHR5c0fkR24hAi6Fe1JDNEIy8lf3emzIhsW9d1bwbw==\n", + "\u001b[1mage\u001b[0m: 4\n", + "\n" + ] + } + ], + "source": [ + "! curl --user \"forwardjustice:trafficstops\" --head https://staging.nccopwatch.org/api/agency/80/stops/" + ] + }, + { + "cell_type": "code", + "execution_count": 286, + "id": "c3ecdbd4-0342-4325-a379-a4b15b73cc06", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{\"year\":2001,\"asian\":0,\"black\":1,\"native_american\":0,\"other\":0,\"white\":0,\"hispanic\":0},{\"year\":2002,\"asian\":251,\"black\":8128,\"native_american\":21,\"other\":303,\"white\":4414,\"hispanic\":1960},{\"year\":2003,\"asian\":204,\"black\":8323,\"native_american\":11,\"other\":139,\"white\":4581,\"hispanic\":2108},{\"year\":2004,\"asian\":215,\"black\":6739,\"native_american\":6,\"other\":168,\"white\":4526,\"hispanic\":1644},{\"year\":2005,\"asian\":163,\"black\":5626,\"native_american\":7,\"other\":149,\"white\":3353,\"hispanic\":1377},{\"year\":2006,\"asian\":178,\"black\":7186,\"native_american\":40,\"other\":94,\"white\":3850,\"hispanic\":1626},{\"year\":2007,\"asian\":261,\"black\":10330,\"native_american\":70,\"other\":75,\"white\":5324,\"hispanic\":2288},{\"year\":2008,\"asian\":382,\"black\":16394,\"native_american\":90,\"other\":98,\"white\":7221,\"hispanic\":3199},{\"year\":2009,\"asian\":213,\"black\":8882,\"native_american\":60,\"other\":38,\"white\":4287,\"hispanic\":1517},{\"year\":2010,\"asian\":444,\"black\":15929,\"native_american\":127,\"other\":69,\"white\":7234,\"hispanic\":3755},{\"year\":2011,\"asian\":511,\"black\":15762,\"native_american\":156,\"other\":104,\"white\":7451,\"hispanic\":3382},{\"year\":2012,\"asian\":387,\"black\":13970,\"native_american\":102,\"other\":90,\"white\":5895,\"hispanic\":3203},{\"year\":2013,\"asian\":462,\"black\":15280,\"native_american\":192,\"other\":96,\"white\":6641,\"hispanic\":2959},{\"year\":2014,\"asian\":378,\"black\":12841,\"native_american\":143,\"other\":84,\"white\":5982,\"hispanic\":2512},{\"year\":2015,\"asian\":359,\"black\":12258,\"native_american\":141,\"other\":101,\"white\":5674,\"hispanic\":2203},{\"year\":2016,\"asian\":259,\"black\":8534,\"native_american\":119,\"other\":46,\"white\":4183,\"hispanic\":1613},{\"year\":2017,\"asian\":236,\"black\":6703,\"native_american\":55,\"other\":51,\"white\":3320,\"hispanic\":1200},{\"year\":2018,\"asian\":280,\"black\":7716,\"native_american\":59,\"other\":69,\"white\":3266,\"hispanic\":1297},{\"year\":2019,\"asian\":274,\"black\":9232,\"native_american\":64,\"other\":79,\"white\":3708,\"hispanic\":1454},{\"year\":2020,\"asian\":204,\"black\":6597,\"native_american\":41,\"other\":34,\"white\":2660,\"hispanic\":1288},{\"year\":2021,\"asian\":250,\"black\":7063,\"native_american\":32,\"other\":36,\"white\":2895,\"hispanic\":1405},{\"year\":2022,\"asian\":218,\"black\":6413,\"native_american\":30,\"other\":41,\"white\":1929,\"hispanic\":1401},{\"year\":2023,\"asian\":433,\"black\":8672,\"native_american\":82,\"other\":34,\"white\":3319,\"hispanic\":2214},{\"year\":2024,\"asian\":246,\"black\":3310,\"native_american\":17,\"other\":27,\"white\":1538,\"hispanic\":983}]" + ] + } + ], + "source": [ + "! curl --user \"forwardjustice:trafficstops\" https://staging.nccopwatch.org/api/agency/80/stops/" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "id": "a1107a09-2eea-4185-88c8-634133c6f7a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{\"year\":2001,\"asian\":0,\"black\":1,\"native_american\":0,\"other\":0,\"white\":0,\"hispanic\":0},{\"year\":2002,\"asian\":251,\"black\":8128,\"native_american\":21,\"other\":303,\"white\":4414,\"hispanic\":1960},{\"year\":2003,\"asian\":204,\"black\":8323,\"native_american\":11,\"other\":139,\"white\":4581,\"hispanic\":2108},{\"year\":2004,\"asian\":215,\"black\":6739,\"native_american\":6,\"other\":168,\"white\":4526,\"hispanic\":1644},{\"year\":2005,\"asian\":163,\"black\":5626,\"native_american\":7,\"other\":149,\"white\":3353,\"hispanic\":1377},{\"year\":2006,\"asian\":178,\"black\":7186,\"native_american\":40,\"other\":94,\"white\":3850,\"hispanic\":1626},{\"year\":2007,\"asian\":261,\"black\":10330,\"native_american\":70,\"other\":75,\"white\":5324,\"hispanic\":2288},{\"year\":2008,\"asian\":382,\"black\":16394,\"native_american\":90,\"other\":98,\"white\":7221,\"hispanic\":3199},{\"year\":2009,\"asian\":213,\"black\":8882,\"native_american\":60,\"other\":38,\"white\":4287,\"hispanic\":1517},{\"year\":2010,\"asian\":444,\"black\":15929,\"native_american\":127,\"other\":69,\"white\":7234,\"hispanic\":3755},{\"year\":2011,\"asian\":511,\"black\":15762,\"native_american\":156,\"other\":104,\"white\":7451,\"hispanic\":3382},{\"year\":2012,\"asian\":387,\"black\":13970,\"native_american\":102,\"other\":90,\"white\":5895,\"hispanic\":3203},{\"year\":2013,\"asian\":462,\"black\":15280,\"native_american\":192,\"other\":96,\"white\":6641,\"hispanic\":2959},{\"year\":2014,\"asian\":378,\"black\":12841,\"native_american\":143,\"other\":84,\"white\":5982,\"hispanic\":2512},{\"year\":2015,\"asian\":359,\"black\":12258,\"native_american\":141,\"other\":101,\"white\":5674,\"hispanic\":2203},{\"year\":2016,\"asian\":259,\"black\":8534,\"native_american\":119,\"other\":46,\"white\":4183,\"hispanic\":1613},{\"year\":2017,\"asian\":236,\"black\":6703,\"native_american\":55,\"other\":51,\"white\":3320,\"hispanic\":1200},{\"year\":2018,\"asian\":280,\"black\":7716,\"native_american\":59,\"other\":69,\"white\":3266,\"hispanic\":1297},{\"year\":2019,\"asian\":274,\"black\":9232,\"native_american\":64,\"other\":79,\"white\":3708,\"hispanic\":1454},{\"year\":2020,\"asian\":204,\"black\":6597,\"native_american\":41,\"other\":34,\"white\":2660,\"hispanic\":1288},{\"year\":2021,\"asian\":250,\"black\":7063,\"native_american\":32,\"other\":36,\"white\":2895,\"hispanic\":1405},{\"year\":2022,\"asian\":218,\"black\":6413,\"native_american\":30,\"other\":41,\"white\":1929,\"hispanic\":1401},{\"year\":2023,\"asian\":433,\"black\":8672,\"native_american\":82,\"other\":34,\"white\":3319,\"hispanic\":2214},{\"year\":2024,\"asian\":246,\"black\":3310,\"native_american\":17,\"other\":27,\"white\":1538,\"hispanic\":983}]" + ] + } + ], + "source": [ + "! curl --user \"forwardjustice:trafficstops\" https://staging.nccopwatch.org/api/agency/80/stops/" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "4eebc390-c324-4447-851d-11193bcfe856", + "metadata": {}, + "outputs": [], + "source": [ + "! open https://staging.nccopwatch.org/agencies/80/traffic-stops" + ] + }, + { + "cell_type": "markdown", + "id": "9f9b158f-2854-4934-9279-ad7e6a989c25", + "metadata": {}, + "source": [ + "## Query logs" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f9f98c7b-0ce4-4ab5-b921-eca3df21e66c", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "import requests\n", + "from rich import print" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "6ee89535-1716-4945-94d9-c01104c2e3f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "98" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = \"\"\"\n", + "{\n", + " actor {\n", + " account(id: 3833993) {\n", + " nrql(query: \"SELECT `message` FROM Log WHERE `namespace_name` = 'trafficstops-staging' AND `container_name` = 'app' SINCE 10 minutes ago LIMIT MAX\") {\n", + " results\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\"\"\"\n", + "\n", + "def get_newrelic_logs():\n", + " endpoint = \"https://api.newrelic.com/graphql\"\n", + " headers = {'API-Key': os.getenv(\"NEW_RELIC_USER_KEY\")}\n", + " response = requests.post(endpoint, headers=headers, json={\"query\": query})\n", + " response.raise_for_status()\n", + "\n", + " if response.status_code == 200:\n", + " data = json.loads(response.content)\n", + " data = [row[\"message\"] for row in data[\"data\"][\"actor\"][\"account\"][\"nrql\"][\"results\"]]\n", + " data = [row for row in data if \"_path\" in row]\n", + " data = [row.split(\"stderr F\")[1] for row in data]\n", + " return data\n", + "\n", + "text = \",\".join(get_newrelic_logs())\n", + "text = f\"[{text}]\"\n", + "pod_requests = json.loads(text)\n", + "len(pod_requests)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e3a5ace6-17bb-4ca8-9e35-722c4568fa92", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 98 entries, 97 to 0\n", + "Data columns (total 44 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 _type 98 non-null object \n", + " 1 _path 98 non-null object \n", + " 2 _now 98 non-null datetime64[ns, UTC]\n", + " 3 Content-Type 49 non-null object \n", + " 4 Vary 37 non-null object \n", + " 5 Allow 37 non-null object \n", + " 6 Host 49 non-null object \n", + " 7 X-Request-Id 49 non-null object \n", + " 8 X-Real-Ip 49 non-null object \n", + " 9 X-Forwarded-For 49 non-null object \n", + " 10 X-Forwarded-Host 49 non-null object \n", + " 11 X-Forwarded-Port 49 non-null object \n", + " 12 X-Forwarded-Proto 49 non-null object \n", + " 13 X-Forwarded-Scheme 49 non-null object \n", + " 14 X-Scheme 49 non-null object \n", + " 15 X-Original-Forwarded-For 49 non-null object \n", + " 16 User-Agent 49 non-null object \n", + " 17 X-Amz-Cf-Id 49 non-null object \n", + " 18 Cookie 11 non-null object \n", + " 19 Accept-Language 12 non-null object \n", + " 20 Accept 37 non-null object \n", + " 21 Referer 10 non-null object \n", + " 22 Via 49 non-null object \n", + " 23 Accept-Encoding 49 non-null object \n", + " 24 X-Newrelic-Id 12 non-null object \n", + " 25 Sec-Ch-Ua-Platform 14 non-null object \n", + " 26 Sec-Ch-Ua 14 non-null object \n", + " 27 Sec-Ch-Ua-Mobile 14 non-null object \n", + " 28 Newrelic 12 non-null object \n", + " 29 Traceparent 12 non-null object \n", + " 30 Tracestate 12 non-null object \n", + " 31 Sec-Fetch-Site 22 non-null object \n", + " 32 Sec-Fetch-Mode 22 non-null object \n", + " 33 Sec-Fetch-Dest 22 non-null object \n", + " 34 Priority 17 non-null object \n", + " 35 Cloudfront-Viewer-Country 37 non-null object \n", + " 36 Expires 12 non-null object \n", + " 37 Cache-Control 15 non-null object \n", + " 38 Upgrade-Insecure-Requests 5 non-null object \n", + " 39 Sec-Fetch-User 5 non-null object \n", + " 40 X-Newrelic-Synthetics-V2 11 non-null object \n", + " 41 X-Abuse-Info 11 non-null object \n", + " 42 X-Newrelic-Synthetics 11 non-null object \n", + " 43 Dnt 11 non-null object \n", + "dtypes: datetime64[ns, UTC](1), object(43)\n", + "memory usage: 34.5+ KB\n" + ] + } + ], + "source": [ + "pod_logs = pd.DataFrame(pod_requests)\n", + "pod_logs[\"_now\"] = pd.to_datetime(pod_logs[\"_now\"])\n", + "pod_logs.sort_values(\"_now\", inplace=True)\n", + "pod_logs.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "f8990eb1-dd49-4458-a44b-c3268c79e42f", + "metadata": {}, + "outputs": [], + "source": [ + "# pod_my_ip = pod_logs[\"X-Original-Forwarded-For\"] == \"\"\n", + "# pod_durham_stops = pod_logs[\"_path\"] == \"/api/agency/80/stops/\"\n", + "\n", + "# with pd.option_context('display.max_rows', 250, 'display.max_columns', 45, \"display.max_colwidth\", None):\n", + "# display(pod_logs[pod_durham_stops])" + ] + }, + { + "cell_type": "markdown", + "id": "3157ac61-9af6-4987-bab7-9fff53227028", + "metadata": {}, + "source": [ + "## Download CloudFront logs" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1d2f5f60-9b83-46ff-b5a5-1ef59db50604", + "metadata": {}, + "outputs": [], + "source": [ + "import boto3\n", + "import datetime as dt\n", + "\n", + "from IPython.display import display\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "6881eb13-b715-4543-9dbd-44236003fc6e", + "metadata": {}, + "outputs": [], + "source": [ + "! mkdir -p input/" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "bb0d2842-1c09-4af9-a372-c756d131a807", + "metadata": {}, + "outputs": [], + "source": [ + "s3 = boto3.resource('s3')\n", + "bucket = s3.Bucket('trafficstops-stack-privateassetsbucket-qwb42tui611z')" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "f09b2007-3b12-415d-b031-d0bb948982cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Downloading E2OFFI0H5HY2N8.2024-10-29-19.04227993.gz\n",
+       "
\n" + ], + "text/plain": [ + "Downloading E2OFFI0H5HY2N8.\u001b[1;36m2024\u001b[0m-\u001b[1;36m10\u001b[0m-\u001b[1;36m29\u001b[0m-\u001b[1;36m19.04227993\u001b[0m.gz\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Download CloudFront logs\n", + "\n", + "now = dt.datetime.utcnow()\n", + "past_few_hours = dt.timedelta(hours=1)\n", + "\n", + "objects = bucket.objects.filter(Prefix='E2OFFI0H5HY2N8.2024-10-29')\n", + "for obj in objects:\n", + " date = dt.datetime.strptime(obj.key.split(\".\")[1], '%Y-%m-%d-%H')\n", + " if date > (now - past_few_hours):\n", + " local_path = (Path(\"input/\") / Path(obj.key).stem)\n", + " if not local_path.exists():\n", + " print(f\"Downloading {obj.key}\")\n", + " obj.Object().download_file(str(local_path) + \".gz\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "3dffdea2-b060-4888-8190-c68b6cbc2d89", + "metadata": {}, + "outputs": [], + "source": [ + "! gunzip input/*.gz" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "ca713045-a4ed-418d-8787-7c2119709841", + "metadata": {}, + "outputs": [], + "source": [ + "columns = [\n", + " 'date',\n", + " 'time',\n", + " 'x-edge-location',\n", + " 'sc-bytes',\n", + " 'c-ip',\n", + " 'cs-method',\n", + " 'cs(Host)',\n", + " 'cs-uri-stem',\n", + " 'sc-status',\n", + " 'cs(Referer)',\n", + " 'cs(User-Agent)',\n", + " 'cs-uri-query',\n", + " 'cs(Cookie)',\n", + " 'x-edge-result-type',\n", + " 'x-edge-request-id',\n", + " 'x-host-header',\n", + " 'cs-protocol',\n", + " 'cs-bytes',\n", + " 'time-taken',\n", + " 'x-forwarded-for',\n", + " 'ssl-protocol',\n", + " 'ssl-cipher',\n", + " 'x-edge-response-result-type',\n", + " 'cs-protocol-version',\n", + " 'fle-status',\n", + " 'fle-encrypted-fields',\n", + " 'c-port',\n", + " 'time-to-first-byte',\n", + " 'x-edge-detailed-result-type',\n", + " 'sc-content-type',\n", + " 'sc-content-len',\n", + " 'sc-range-start',\n", + " 'sc-range-end'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "5e815334-4771-45f0-ae49-f646a76c0be4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 553 entries, 371 to 442\n", + "Data columns (total 33 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 553 non-null datetime64[ns]\n", + " 1 x-edge-location 553 non-null object \n", + " 2 sc-bytes 553 non-null int64 \n", + " 3 c-ip 553 non-null object \n", + " 4 cs-method 553 non-null object \n", + " 5 cs(Host) 553 non-null object \n", + " 6 cs-uri-stem 553 non-null object \n", + " 7 sc-status 553 non-null int64 \n", + " 8 cs(Referer) 553 non-null object \n", + " 9 cs(User-Agent) 553 non-null object \n", + " 10 cs-uri-query 553 non-null object \n", + " 11 cs(Cookie) 553 non-null object \n", + " 12 x-edge-result-type 553 non-null object \n", + " 13 x-edge-request-id 553 non-null object \n", + " 14 x-host-header 553 non-null object \n", + " 15 cs-protocol 553 non-null object \n", + " 16 cs-bytes 553 non-null int64 \n", + " 17 time-taken 553 non-null float64 \n", + " 18 x-forwarded-for 553 non-null object \n", + " 19 ssl-protocol 553 non-null object \n", + " 20 ssl-cipher 553 non-null object \n", + " 21 x-edge-response-result-type 553 non-null object \n", + " 22 cs-protocol-version 553 non-null object \n", + " 23 fle-status 553 non-null object \n", + " 24 fle-encrypted-fields 553 non-null object \n", + " 25 c-port 553 non-null int64 \n", + " 26 time-to-first-byte 553 non-null float64 \n", + " 27 x-edge-detailed-result-type 553 non-null object \n", + " 28 sc-content-type 553 non-null object \n", + " 29 sc-content-len 553 non-null object \n", + " 30 sc-range-start 553 non-null object \n", + " 31 sc-range-end 553 non-null object \n", + " 32 X-Request-Id 553 non-null object \n", + "dtypes: datetime64[ns](1), float64(2), int64(4), object(26)\n", + "memory usage: 146.9+ KB\n" + ] + } + ], + "source": [ + "dfs = []\n", + "\n", + "for path in sorted(Path(\"input/\").glob(\"E2OFFI0H5HY2N8*\")):\n", + " dfs.append(\n", + " pd.read_csv(\n", + " path,\n", + " delimiter='\\t',\n", + " skiprows=2,\n", + " names=columns,\n", + " )\n", + " )\n", + "\n", + "cf_logs = pd.concat(dfs, ignore_index=True)\n", + "cf_logs[\"date\"] = pd.to_datetime(cf_logs['date'] + ' ' + cf_logs['time'])\n", + "# cf_logs[\"sc-content-len\"] = cf_logs[\"sc-content-len\"].replace('-', pd.NA).astype(\"Int64\")\n", + "cf_logs[\"X-Request-Id\"] = cf_logs[\"x-edge-request-id\"]\n", + "cf_logs.drop(columns=[\"time\"], inplace=True)\n", + "cf_logs.sort_values(\"date\", inplace=True)\n", + "cf_logs.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "483b4d84-2f56-421e-9218-afc125fb6d8c", + "metadata": {}, + "outputs": [], + "source": [ + "# my_ip = cf_logs[\"c-ip\"] == \"\"\n", + "# durham_stops = cf_logs[\"cs-uri-stem\"] == \"/api/agency/80/stops/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "4eea0e3a-3db1-4798-a4ec-a95d6aae6e9e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "datetime.datetime(2024, 10, 29, 19, 56, 51, 396469)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt.datetime.utcnow()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "0cd6e5a0-c99d-4e3e-8fe1-dc6805b759f5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datex-edge-locationsc-bytesc-ipcs-methodcs(Host)cs-uri-stemsc-statuscs(Referer)cs(User-Agent)cs-uri-querycs(Cookie)x-edge-result-typex-edge-request-idx-host-headercs-protocolcs-bytestime-takenx-forwarded-forssl-protocolssl-cipherx-edge-response-result-typecs-protocol-versionfle-statusfle-encrypted-fieldsc-porttime-to-first-bytex-edge-detailed-result-typesc-content-typesc-content-lensc-range-startsc-range-endX-Request-Id
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [date, x-edge-location, sc-bytes, c-ip, cs-method, cs(Host), cs-uri-stem, sc-status, cs(Referer), cs(User-Agent), cs-uri-query, cs(Cookie), x-edge-result-type, x-edge-request-id, x-host-header, cs-protocol, cs-bytes, time-taken, x-forwarded-for, ssl-protocol, ssl-cipher, x-edge-response-result-type, cs-protocol-version, fle-status, fle-encrypted-fields, c-port, time-to-first-byte, x-edge-detailed-result-type, sc-content-type, sc-content-len, sc-range-start, sc-range-end, X-Request-Id]\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with pd.option_context('display.max_rows', 250, 'display.max_columns', 40, \"display.max_colwidth\", None):\n", + " display(cf_logs[my_ip & durham_stops])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nc/notebooks/requirements.txt b/nc/notebooks/requirements.txt index 78aead6d..407e1321 100644 --- a/nc/notebooks/requirements.txt +++ b/nc/notebooks/requirements.txt @@ -5,4 +5,4 @@ jupyter-dash==0.4.2 jupyter_server>=2.0.0 dash==2.8.1 plotly==5.13.0 -psycopg2==2.9.3 +psycopg2==2.9.9 diff --git a/nc/prime_cache.py b/nc/prime_cache.py index fc124a33..3d9b941c 100755 --- a/nc/prime_cache.py +++ b/nc/prime_cache.py @@ -1,17 +1,21 @@ import logging +import time -from time import perf_counter +from contextlib import contextmanager +from typing import Generator + +import boto3 +import httpx from django.conf import settings -from django.core.cache import cache -from django.db.models import Count, F, Sum -from django.test.client import Client +from django.db.models import F, Q, Sum from django.urls import reverse -from nc.models import Stop, StopSummary +from nc.models import StopSummary logger = logging.getLogger(__name__) API_ENDPOINT_NAMES = ( + "nc:year-range", "nc:agency-api-stops", "nc:agency-api-stops-by-reason", "nc:agency-api-searches", @@ -21,10 +25,11 @@ "nc:stops-by-count", "nc:stop-purpose-groups", "nc:stops-grouped-by-purpose", - "nc:contraband-percentages", "nc:searches-by-percentage", "nc:searches-by-count", "nc:search-rate", + "nc:contraband-percentages", + "nc:contraband-type-percentages", "nc:contraband-percentages-stop-purpose-groups", "nc:contraband-percentages-grouped-stop-purpose", "nc:contraband-percentages-grouped-stop-purpose-modal", @@ -38,202 +43,178 @@ "nc:arrests-percentage-of-searches-per-stop-purpose", "nc:arrests-percentage-of-stops-per-contraband-type", ) -DEFAULT_CUTOFF_SECS = 4 - - -def avoid_newrelic_bug(): - """ - New Relic middleware throws an exception when a web request is run in a - Celery task (without going through HTTP). - - An AttributeError is thrown here: - - https://github.com/edmorley/newrelic-python-agent/blob/v2.82.0.62/newrelic/ - newrelic/hooks/framework_django.py#L93 - - AttributeError: 'BackgroundTask' object has no attribute 'rum_header_generated' - - By disabling the browser_monitoring setting checked for just before the - AttributeError, this New Relic gets out of the way before the problem. - - In production, this normally runs in a Celery task that exits when the - import is finished due to the Celery "--maxtasksperchild 1" parameter. - Even if more tasks ran in the same process, they too won't be handling - browser requests so the setting change won't affect such tasks. - - This Mozilla project ticket has a copy of some correspondence with New Relic: - https://bugzilla.mozilla.org/show_bug.cgi?id=1196043 - (I am unable to access the referenced New Relic ticket.) - """ - try: - from newrelic.hooks.framework_django import django_settings - - django_settings.browser_monitoring.auto_instrument = False - except ImportError: - pass +CLOUDFRONT_RESPONSE_TIMEOUT = 60 class Timer: - def __init__(self, cutoff): - self.cutoff = cutoff + """Context manager to time a block of code""" + + def __init__(self, threshold_seconds: int = None): + self.threshold_seconds = threshold_seconds def __enter__(self): - self.start = perf_counter() + self.start = time.perf_counter() return self def __exit__(self, type, value, traceback): - self.elapsed = perf_counter() - self.start - self.stop = self.elapsed < self.cutoff - self.readout = f"{self.elapsed} < {self.cutoff} = {self.stop}" - - -class CachePrimer: - def __init__(self, cutoff_secs=0, cutoff_count=None): - self.cutoff_secs = cutoff_secs - self.cutoff_count = cutoff_count - self.count = 0 - - def request(self, uri, payload=None): - c = Client() - if settings.ALLOWED_HOSTS and settings.ALLOWED_HOSTS[0] != "*": - host = settings.ALLOWED_HOSTS[0] - else: - host = "127.0.0.1" - logger.debug(f"Querying {uri}") - response = c.get(uri, data=payload, HTTP_HOST=host) - if response.status_code != 200: - logger.warning("Status not OK: {} ({})".format(uri, response.status_code)) - raise Exception("Request to %s failed: %s", uri, response.status_code) - - def get_endpoints(self): - for idx, row in enumerate(self.get_queryset()): - with Timer(self.cutoff_secs) as timer: - yield self.get_urls(row) - officer_id = row.get("officer_id", "") - logger.info( - ( - "Primed cache for agency %s:%s " - "[officer_id=%s] with " - "%s stops in %.2f secs (%s of %s)" - ), - row["agency_id"], - row["agency_name"], - officer_id, - "{:,}".format(row["num_stops"]), - timer.elapsed, - idx, - self.count, - ) - if timer.stop or (self.cutoff_count and idx == self.cutoff_count): - logger.info("Cutoff reached, stopping...") - break - - def prime(self): - logger.info(f"{self} starting") - self.count = len(self.get_queryset()) - logger.info(f"{self} priming {self.count:,} objects") - for endpoints in self.get_endpoints(): - for endpoint in endpoints: - self.request(endpoint) - - def __repr__(self): - options = [] - if self.cutoff_secs: - options.append(f"cutoff_secs={self.cutoff_secs}") - if self.cutoff_count: - options.append(f"cutoff_count={self.cutoff_count}") - return f"<{self.__class__.__name__} {' '.join(options)}>" - - -class AgencyStopsPrimer(CachePrimer): - def get_queryset(self): - qs = list( - Stop.objects.no_cache() - .annotate(agency_name=F("agency_description")) - .values("agency_name", "agency_id") - .annotate(num_stops=Count("stop_id")) - .order_by("-num_stops") + self.elapsed = time.perf_counter() - self.start + self.exceeded_threshold = ( + self.elapsed > self.threshold_seconds if self.threshold_seconds else False ) + + +def get_agencies_and_officers(by_officer: bool = False, limit_to_agencies: list = None) -> list: + """Return a list of agencies (and optionally officers) sorted by number of stops""" + limit_to_agencies = limit_to_agencies or [] + values = ["agency_id"] + if by_officer: + values.append("officer_id") + query = Q() + if limit_to_agencies: + query &= Q(agency_id__in=limit_to_agencies) + rows = list( + StopSummary.objects.filter(query) + .annotate(agency_name=F("agency__name")) + .values(*values) + .annotate(num_stops=Sum("count")) + .order_by("-num_stops") + .values_list(*values + ["num_stops"], named=True) + ) + if not by_officer and not limit_to_agencies: # Manually insert the statewide to force the caching since a # stop instance won't directly be associated with the statewide agency id. - qs.insert( + Row = rows[0].__class__ + rows.insert( 0, - { - "agency_name": "North Carolina State", - "agency_id": -1, - "num_stops": Stop.objects.count(), - }, + Row( + agency_id=-1, + num_stops=StopSummary.objects.aggregate(Sum("count"))["count__sum"], + ), ) - return qs - - def get_urls(self, row): - urls = [] - for endpoint_name in API_ENDPOINT_NAMES: - urls.append(reverse(endpoint_name, args=[row["agency_id"]])) - return urls - - -class OfficerStopsPrimer(CachePrimer): - def get_queryset(self): - return ( - StopSummary.objects.all() - .annotate(agency_name=F("agency__name")) - .values("agency_name", "agency_id", "officer_id") - .annotate(num_stops=Sum("count")) - .order_by("-num_stops") + logger.info( + f"Found {len(rows):,} agencies and officers " + f"({by_officer=}, {limit_to_agencies=}, {values=}, {query=})" + ) + return rows + + +def get_group_urls(agency_id: int, officer_id: int = None) -> list[str]: + """Return a list of endpoint URLs for an agency (and optionally an officer)""" + if settings.ALLOWED_HOSTS and settings.ALLOWED_HOSTS[0] != "*": + host = f"https://{settings.ALLOWED_HOSTS[0]}" + else: + host = "http://127.0.0.1:8000" + urls = [] + for endpoint_name in API_ENDPOINT_NAMES: + url = reverse(endpoint_name, args=[agency_id]) + if officer_id: + url += f"?officer={officer_id}" + urls.append(host + url) + return urls + + +@contextmanager +def client() -> Generator[httpx.Client, None, None]: + """Return a configured HTTPX client for cache priming""" + # Attempt to match Browser behavior + headers = { + "Accept": "application/json, text/plain, */*", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Accept-Language": "en-US,en;q=0.9", + } + # Configure basic auth (for staging environment) + auth = None + if settings.CACHE_BASICAUTH_USERNAME and settings.CACHE_BASICAUTH_PASSWORD: + auth = httpx.BasicAuth( + username=settings.CACHE_BASICAUTH_USERNAME, password=settings.CACHE_BASICAUTH_PASSWORD + ) + with httpx.Client( + auth=auth, headers=headers, http2=True, timeout=CLOUDFRONT_RESPONSE_TIMEOUT + ) as client: + yield client + + +def prime_endpoint_cache(client: httpx.Client, url: str, headers: dict = None): + """Prime the cache for a single endpoint""" + logger.debug(f"Priming endpoint cache ({url=})...") + with Timer(threshold_seconds=CLOUDFRONT_RESPONSE_TIMEOUT - 1) as timer: + response = client.get(url, headers=headers) + logger.debug( + f"Queried {url=} ({response.headers=}, {response.request.headers=}, {timer.elapsed=})" + ) + if timer.exceeded_threshold: + raise Exception(f"Slow prime cache response possibly not cached {url} ({timer.elapsed})") + if response.status_code != 200: + raise Exception(f"Request to {url} failed: {response.status_code}") + + +def prime_group_cache(agency_id: int, num_stops: int, officer_id: int = None): + """Prime the cache for an agency (and optionally officer)""" + logger.debug(f"Priming group cache ({agency_id=}, {officer_id=}, {num_stops=})...") + with client() as c: + logger.info( + f"Priming cache ({agency_id=}, {officer_id=}, {num_stops=}, {bool(c.auth)=})..." + ) + urls = get_group_urls(agency_id=agency_id, officer_id=officer_id) + with Timer() as group_timer: + for url in urls: + # Request with brotli encoding + prime_endpoint_cache( + client=c, + url=url, + # headers={"Accept-Encoding": "gzip, deflate, br, zstd"}, + ) + # Add a URL with a trailing ? to ensure the cache is primed + # since React sometimes appends it to the URL + prime_endpoint_cache(client=c, url=url + "?") + # Request with alternative gzip encoding + prime_endpoint_cache( + client=c, + url=url, + headers={"Accept-Encoding": "gzip, deflate, zstd"}, + ) + + logger.info( + f"Primed cache ({agency_id=}, {officer_id=}, {num_stops=}, {group_timer.elapsed=})" ) - def get_urls(self, row): - urls = [] - for endpoint_name in API_ENDPOINT_NAMES: - agency_url = reverse(endpoint_name, args=[row["agency_id"]]) - urls.append(f"{agency_url}?officer={row['officer_id']}") - return urls - - -def run( - cutoff_duration_secs=None, - clear_cache=False, - skip_agencies=False, - skip_officers=True, - officer_cutoff_count=None, -): - """ - Prime query cache for "big" NC agencies. - - Order the agencies by number of stops, and keep making the web requests - that use the queries until the queries for an agency take less than - cutoff_duration_secs. - - This is expected to be used as part of the following flow: - 1. reload new NC data - 2. flush memcached - 3. prime the cache to load the new data into the query cache - - If memcached isn't flushed before priming the cache, this function will - presumably exit prematurely without loading the new data. - - This uses the Django test client to avoid encountering Gunicorn timeouts, - so it can't be used remotely. - :param cutoff_duration_secs: Once priming the cache for an agency takes - less than this, stop. +def invalidate_cloudfront_cache(sleep_seconds: int = 30) -> dict: """ - if cutoff_duration_secs is None: - cutoff_duration_secs = DEFAULT_CUTOFF_SECS - - avoid_newrelic_bug() + Invalidate the CloudFront cache before priming the cache. - if clear_cache: - logger.info("Clearing cache") - cache.clear() - - if not skip_agencies: - AgencyStopsPrimer(cutoff_secs=cutoff_duration_secs).prime() - - if not skip_officers: - OfficerStopsPrimer( - cutoff_secs=0, cutoff_count=officer_cutoff_count - ).prime() # cache all officer endpoints for now - - logger.info("Complete") + https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudfront/client/create_invalidation.html + """ + if settings.CACHE_CLOUDFRONT_DISTRIBUTION_ID: + logger.info( + f"Invalidating CloudFront distribution ({settings.CACHE_CLOUDFRONT_DISTRIBUTION_ID=})" + ) + cf = boto3.client("cloudfront") + # Create CloudFront invalidation + resp_invalidation: dict = cf.create_invalidation( + DistributionId=settings.CACHE_CLOUDFRONT_DISTRIBUTION_ID, + InvalidationBatch={ + "Paths": {"Quantity": 1, "Items": ["/*"]}, + "CallerReference": str(time.time()).replace(".", ""), + }, + ) + invalidation_id: str = resp_invalidation["Invalidation"]["Id"] + # Wait for invalidation to complete + invalidation_in_progress = True + while invalidation_in_progress: + response = cf.get_invalidation( + DistributionId=settings.CACHE_CLOUDFRONT_DISTRIBUTION_ID, Id=invalidation_id + ) + status = response["Invalidation"]["Status"] + if status == "Completed": + # Stop waiting, invalidation is complete + invalidation_in_progress = False + logger.info(f"Invalidation complete ({status=})") + elif status == "InProgress": + # Wait before checking again + logger.debug( + f"Invalidation pending ({status=}), sleeping for {sleep_seconds} seconds..." + ) + time.sleep(sleep_seconds) + else: + raise Exception(f"Invalidation failed: {status})") diff --git a/nc/tasks.py b/nc/tasks.py index eda1871b..727688f2 100755 --- a/nc/tasks.py +++ b/nc/tasks.py @@ -7,6 +7,7 @@ from django.utils.timezone import now from celery.utils.log import get_task_logger +from nc import prime_cache from nc.data.importer import MAGIC_NC_FTP_URL from traffic_stops.celery import app from tsdata.models import Dataset @@ -44,3 +45,52 @@ def download_and_import_nc_dataset(): nc_dataset.report_email_2 = settings.NC_AUTO_IMPORT_MONITORS[1] nc_dataset.save() import_dataset.delay(nc_dataset.pk) + + +@app.task(autoretry_for=(Exception,), retry_backoff=True, retry_kwargs={"max_retries": 5}) +def prime_group_cache(agency_id: int, num_stops: int, officer_id: int = None): + """Prime the cache for a single agency (and optionally officer)""" + prime_cache.prime_group_cache(agency_id=agency_id, num_stops=num_stops, officer_id=officer_id) + # Run the task again to ensure the cache is primed + prime_cache.prime_group_cache(agency_id=agency_id, num_stops=num_stops, officer_id=officer_id) + return (agency_id, officer_id, num_stops) + + +def prime_groups_cache( + by_officer: bool = False, cutoff_count: int = 0, limit_to_agencies: list[int] = None +): + kind = "officer" if by_officer else "agency" + logger.info(f"Querying {kind} endpoint groups ({by_officer=}, {cutoff_count=})") + # Get the agencies (and officers) sorted by number of stops + endpoint_groups = prime_cache.get_agencies_and_officers( + by_officer=by_officer, limit_to_agencies=limit_to_agencies + ) + logger.info(f"Queuing {len(endpoint_groups):,} {kind} endpoint groups") + for endpoint_group in endpoint_groups: + if endpoint_group.num_stops <= cutoff_count: + logger.info(f"Stopping due to cutoff ({endpoint_group.num_stops=}, {cutoff_count=})") + break + prime_group_cache.delay(**endpoint_group._asdict()) + + +@app.task +def prime_all_endpoints( + clear_cache: bool = False, + skip_agencies: bool = False, + skip_officers: bool = True, + agency_cutoff_count: int = 0, + limit_to_agencies: list[int] = None, +): + """Prime all API endpoint caches""" + if clear_cache: + prime_cache.invalidate_cloudfront_cache() + + if not skip_agencies: + prime_groups_cache( + by_officer=False, cutoff_count=agency_cutoff_count, limit_to_agencies=limit_to_agencies + ) + + if not skip_officers: + prime_groups_cache(by_officer=True, limit_to_agencies=limit_to_agencies) + + logger.info("Complete") diff --git a/nc/tests/api/test_state_facts.py b/nc/tests/api/test_state_facts.py index dafe1c91..8faa699c 100755 --- a/nc/tests/api/test_state_facts.py +++ b/nc/tests/api/test_state_facts.py @@ -1,11 +1,12 @@ -import factory import pytest from django.conf import settings +from faker import Faker from tsdata.models import StateFacts pytestmark = pytest.mark.django_db +fake = Faker() @pytest.mark.parametrize( @@ -21,10 +22,10 @@ ) def test_state_facts(client, facts_url, attr): facts = StateFacts.objects.get(state_key=settings.NC_KEY) # always exists - facts.total_stops = factory.Faker("random_number", digits=9).generate() - facts.total_stops_millions = factory.Faker("random_number", digits=2).generate() - facts.total_searches = factory.Faker("random_number", digits=5).generate() - facts.total_agencies = factory.Faker("random_number", digits=3).generate() + facts.total_stops = fake.random_number(digits=9) + facts.total_stops_millions = fake.random_number(digits=2) + facts.total_searches = fake.random_number(digits=5) + facts.total_agencies = fake.random_number(digits=3) facts.start_date = "Jan 1, 2000" facts.end_date = "Jan 1, 2020" facts.save() diff --git a/nc/tests/test_prime_cache.py b/nc/tests/test_prime_cache.py index 10d11622..24297bb7 100755 --- a/nc/tests/test_prime_cache.py +++ b/nc/tests/test_prime_cache.py @@ -1,23 +1,31 @@ -from django.test import TestCase +import pytest -from nc.prime_cache import run -from nc.tests import factories +from nc import prime_cache -class PrimeCacheTests(TestCase): - """ - This merely gives the cache priming code a chance to blow up if silly - changes are made. No results are verified. - """ +@pytest.fixture(autouse=True) +def group_urls(): + """Monkeypatch API_ENDPOINT_NAMES to return a single group URL for tests""" + prime_cache.API_ENDPOINT_NAMES = ("nc:arrests-percentage-of-stops",) - databases = "__all__" - def test_prime_cache(self): - factories.AgencyFactory(id=-1) # Statewide data +class TestGetGroupUrls: + def test_get_group_urls_empty_allowed_hosts(self, settings): + settings.ALLOWED_HOSTS = [] + assert ( + prime_cache.get_group_urls(agency_id=99)[0] + == "http://127.0.0.1:8000/api/agency/99/arrests-percentage-of-stops/" + ) - factories.ContrabandFactory() - factories.ContrabandFactory() - factories.ContrabandFactory() - factories.ContrabandFactory() - factories.ContrabandFactory() - run() + def test_get_group_urls_allowed_hosts(self, settings): + settings.ALLOWED_HOSTS = ["nccopwatch.org"] + assert ( + prime_cache.get_group_urls(agency_id=99)[0] + == "https://nccopwatch.org/api/agency/99/arrests-percentage-of-stops/" + ) + + def test_get_group_urls_officer_id(self): + assert ( + prime_cache.get_group_urls(agency_id=99, officer_id=100)[0] + == "https://testserver/api/agency/99/arrests-percentage-of-stops/?officer=100" + ) diff --git a/nc/views/arrests.py b/nc/views/arrests.py index 271736a8..c44a600d 100644 --- a/nc/views/arrests.py +++ b/nc/views/arrests.py @@ -1,19 +1,14 @@ import django_filters import pandas as pd -from django.conf import settings from django.db.models import Count, Q, Sum from django.db.models.functions import ExtractYear -from django.utils.decorators import method_decorator -from django.views.decorators.cache import cache_page from rest_framework.response import Response from rest_framework.views import APIView from nc.constants import CONTRABAND_TYPE_COLS, DEFAULT_RENAME_COLUMNS, STATEWIDE from nc.models import ContrabandSummary, StopPurpose, StopPurposeGroup, StopSummary -CACHE_TIMEOUT = settings.CACHE_COUNT_TIMEOUT - def create_table_data_response(qs, pivot_columns=None, value_key=None, rename_columns=None): rename_cols = rename_columns if rename_columns else DEFAULT_RENAME_COLUMNS @@ -196,7 +191,6 @@ def sort_by_stop_purpose_group(df): class AgencyArrestsPercentageOfStopsView(APIView): """Traffic Stops Leading to Arrest by Percentage""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): # Build chart data chart_df = arrest_query(request, agency_id, group_by=("driver_race_comb",)) @@ -211,7 +205,6 @@ def get(self, request, agency_id): class AgencyArrestsPercentageOfSearchesView(APIView): """Searches Leading to Arrest by Percentage""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): # Build chart data chart_df = arrest_query(request, agency_id, group_by=("driver_race_comb",)) @@ -226,7 +219,6 @@ def get(self, request, agency_id): class AgencyCountOfStopsAndArrests(APIView): """Traffic Stops Leading to Arrest by Count""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): # Build chart data chart_df = arrest_query(request, agency_id, group_by=("driver_race_comb",)).sort_values( @@ -245,7 +237,6 @@ def get(self, request, agency_id): class AgencyArrestsPercentageOfStopsByGroupPurposeView(APIView): """Percentage of Stops Leading to Arrest by Stop Purpose Group""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): # Conditionally build table data if request.query_params.get("modal"): @@ -269,7 +260,6 @@ def get(self, request, agency_id): class AgencyArrestsPercentageOfStopsPerStopPurposeView(APIView): """Percentage of Stops Leading to Arrest by Stop Purpose Type""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): # Conditionally build table data if request.query_params.get("modal"): @@ -292,7 +282,6 @@ def get(self, request, agency_id): class AgencyArrestsPercentageOfSearchesByGroupPurposeView(APIView): """Percentage of Searches Leading to Arrest by Stop Purpose Group""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): # Conditionally build table data if request.query_params.get("modal"): @@ -313,7 +302,6 @@ def get(self, request, agency_id): class AgencyArrestsPercentageOfSearchesPerStopPurposeView(APIView): """Percentage of Searches Leading to Arrest by Stop Purpose Type""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): # Conditionally build table data if request.query_params.get("modal"): @@ -339,7 +327,6 @@ def get(self, request, agency_id): class AgencyArrestsPercentageOfStopsPerContrabandTypeView(APIView): """Percentage of Stops Leading to Arrest by Discovered Contraband Type""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): chart_df = contraband_query(request, agency_id, group_by=("contraband_type",)) chart_data = chart_df["driver_contraband_arrest_rate"].to_list() @@ -358,7 +345,6 @@ def get(self, request, agency_id): class AgencyStopsYearRange(APIView): """Returns list of years with data for agency/officer""" - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): filter_set = ArrestSummaryFilterSet(request.GET, agency_id=agency_id) year_range = filter_set.qs.order_by("-year").values_list("year", flat=True).distinct("year") diff --git a/nc/views/main.py b/nc/views/main.py index 21c602b2..e7bf96d8 100644 --- a/nc/views/main.py +++ b/nc/views/main.py @@ -13,13 +13,12 @@ from django.db.models import Count, Q, Sum from django.db.models.functions import ExtractYear from django.utils.decorators import method_decorator -from django.views.decorators.cache import cache_page, never_cache +from django.views.decorators.cache import never_cache from django_filters.rest_framework import DjangoFilterBackend from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.views import APIView -from rest_framework_extensions.cache.decorators import cache_response from rest_framework_extensions.key_constructor import bits from rest_framework_extensions.key_constructor.constructors import DefaultObjectKeyConstructor @@ -90,9 +89,6 @@ class QueryKeyConstructor(DefaultObjectKeyConstructor): query_cache_key_func = QueryKeyConstructor() -CACHE_TIMEOUT = settings.CACHE_COUNT_TIMEOUT - - def get_date_range(request): # Only filter is from and to values are found and are valid date_precision = "year" @@ -202,14 +198,12 @@ def query(self, results, group_by, filter_=None): results.add(**data) @action(detail=True, methods=["get"]) - @cache_response(key_func=query_cache_key_func) def stops(self, request, pk=None): results = GroupedData(by="year", defaults=GROUP_DEFAULTS) self.query(results, group_by=("year", "driver_race", "driver_ethnicity")) return Response(results.flatten()) @action(detail=True, methods=["get"]) - @cache_response(key_func=query_cache_key_func) def stops_by_reason(self, request, pk=None): response = {} # stops @@ -227,7 +221,6 @@ def stops_by_reason(self, request, pk=None): return Response(response) @action(detail=True, methods=["get"]) - @cache_response(key_func=query_cache_key_func) def use_of_force(self, request, pk=None): results = GroupedData(by="year", defaults=GROUP_DEFAULTS) q = Q(search_type__isnull=False) & Q(engage_force="t") @@ -235,7 +228,6 @@ def use_of_force(self, request, pk=None): return Response(results.flatten()) @action(detail=True, methods=["get"]) - @cache_response(key_func=query_cache_key_func) def searches(self, request, pk=None): results = GroupedData(by="year", defaults=GROUP_DEFAULTS) q = Q(search_type__isnull=False) @@ -243,7 +235,6 @@ def searches(self, request, pk=None): return Response(results.flatten()) @action(detail=True, methods=["get"]) - @cache_response(key_func=query_cache_key_func) def searches_by_type(self, request, pk=None): results = GroupedData(by=("search_type", "year"), defaults=GROUP_DEFAULTS) q = Q(search_type__isnull=False) @@ -381,7 +372,6 @@ def get_values(race): ], } - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): stop_qs = StopSummary.objects.all().annotate(year=ExtractYear("date")) @@ -479,7 +469,6 @@ def get_values(race): ], } - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): date_precision, date_range = get_date_range(request) @@ -525,7 +514,6 @@ def get_values(self, df, stop_purpose, years_len): else: return [0] * years_len - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): date_precision, date_range = get_date_range(request) qs = StopSummary.objects.all() @@ -635,7 +623,6 @@ def get_values(col): ], } - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): date_precision, date_range = get_date_range(request) qs = StopSummary.objects.all() @@ -713,7 +700,6 @@ def get(self, request, agency_id): class AgencyContrabandView(APIView): - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): year = request.GET.get("year", None) @@ -776,7 +762,6 @@ def get(self, request, agency_id): class AgencyContrabandTypesView(APIView): - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): year = request.GET.get("year", None) @@ -886,7 +871,6 @@ def get_values(col): ], } - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): year = request.GET.get("year", None) @@ -1038,7 +1022,6 @@ def create_dataset(self, contraband_df, searches_df, stop_purpose): data.append(group) return data - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): year = request.GET.get("year", None) @@ -1107,7 +1090,6 @@ def get(self, request, agency_id): class AgencyContrabandStopGroupByPurposeModalView(APIView): - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): grouped_stop_purpose = request.GET.get("grouped_stop_purpose") contraband_type = request.GET.get("contraband_type") @@ -1195,7 +1177,6 @@ def get_values(race): ], } - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): stop_qs = StopSummary.objects.all().annotate(year=ExtractYear("date")) @@ -1308,7 +1289,6 @@ def get_values(race): ], } - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): date_precision, date_range = get_date_range(request) @@ -1396,7 +1376,6 @@ def get_values(race): ], } - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): stop_qs = StopSummary.objects.all().annotate(year=ExtractYear("date")) search_qs = StopSummary.objects.filter(search_type__isnull=False).annotate( @@ -1524,7 +1503,6 @@ def get_values(race): ], } - @method_decorator(cache_page(CACHE_TIMEOUT)) def get(self, request, agency_id): qs = StopSummary.objects.filter(search_type__isnull=False, engage_force="t").annotate( year=ExtractYear("date") diff --git a/pytest.ini b/pytest.ini index 58e4bdd1..84c81b08 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] testpaths = nc tsdata python_files = tests.py test_*.py *_tests.py -addopts = --ds=traffic_stops.settings.test -p no:warnings --cov-config=.coveragerc --cov-fail-under=60 --cov=nc --cov=tsdata --cov-report=html --cov-report=term-missing:skip-covered -vvv +addopts = --ds=traffic_stops.settings.test -p no:warnings --cov-config=.coveragerc --cov-fail-under=44 --cov=nc --cov=tsdata --cov-report=html --cov-report=term-missing:skip-covered -vvv diff --git a/requirements/base/base.in b/requirements/base/base.in index ec0b326d..f9bfb6b1 100755 --- a/requirements/base/base.in +++ b/requirements/base/base.in @@ -8,7 +8,7 @@ boto boto3==1.34.100 botocore==1.34.100 click==8.1.7 -django-cache-machine==1.2.0 +# django-cache-machine is no longer used, remains for legacy migrations django-ckeditor==6.7.0 django-click==2.3.0 django-crispy-forms @@ -22,7 +22,9 @@ django-storages==1.13.2 djangorestframework==3.12.4 dj-database-url drf-extensions==0.7.1 -psycopg2<2.9 +psycopg2==2.9.9 +brotli==1.1.0 +httpx[http2] requests==2.32.3 urllib3==2.2.1 six diff --git a/requirements/base/base.txt b/requirements/base/base.txt index 113a2cdb..d835e4af 100755 --- a/requirements/base/base.txt +++ b/requirements/base/base.txt @@ -6,6 +6,8 @@ # amqp==5.2.0 # via kombu +anyio==4.6.2.post1 + # via httpx asgiref==3.5.2 # via django billiard==4.2.0 @@ -19,12 +21,17 @@ botocore==1.34.100 # -r requirements/base/base.in # boto3 # s3transfer +brotli==1.1.0 + # via -r requirements/base/base.in celery==5.4.0 # via -r requirements/base/base.in census==0.8.22 # via -r requirements/base/base.in certifi==2020.6.20 - # via requests + # via + # httpcore + # httpx + # requests charset-normalizer==3.0.1 # via requests click==8.1.7 @@ -55,8 +62,6 @@ django==3.2.25 # django-redis # django-storages # djangorestframework -django-cache-machine==1.2.0 - # via -r requirements/base/base.in django-ckeditor==6.7.0 # via -r requirements/base/base.in django-click==2.3.0 @@ -85,8 +90,25 @@ djangorestframework==3.12.4 # drf-extensions drf-extensions==0.7.1 # via -r requirements/base/base.in +exceptiongroup==1.2.2 + # via anyio +h11==0.14.0 + # via httpcore +h2==4.1.0 + # via httpx +hpack==4.0.0 + # via h2 +httpcore==1.0.6 + # via httpx +httpx[http2]==0.27.2 + # via -r requirements/base/base.in +hyperframe==6.0.1 + # via h2 idna==2.10 - # via requests + # via + # anyio + # httpx + # requests jellyfish==0.6.1 # via us jmespath==1.0.1 @@ -101,7 +123,7 @@ pandas==2.2.2 # via -r requirements/base/base.in prompt-toolkit==3.0.47 # via click-repl -psycopg2==2.8.6 +psycopg2==2.9.9 # via -r requirements/base/base.in python-dateutil==2.9.0.post0 # via @@ -125,8 +147,14 @@ six==1.15.0 # -r requirements/base/base.in # django-extensions # python-dateutil +sniffio==1.3.1 + # via + # anyio + # httpx sqlparse==0.3.1 # via django +typing-extensions==4.12.2 + # via anyio tzdata==2024.1 # via # celery diff --git a/requirements/deploy/deploy.in b/requirements/deploy/deploy.in index 62e440c6..e51b79cd 100644 --- a/requirements/deploy/deploy.in +++ b/requirements/deploy/deploy.in @@ -1,6 +1,6 @@ # Deploy.in -c ../base/base.txt python3-memcached -newrelic==9.10.0 -sentry-sdk==2.5.1 -uwsgi==2.0.26 +newrelic +sentry-sdk +uwsgi diff --git a/requirements/deploy/deploy.txt b/requirements/deploy/deploy.txt index b2cec8a1..3493d03b 100644 --- a/requirements/deploy/deploy.txt +++ b/requirements/deploy/deploy.txt @@ -8,15 +8,15 @@ certifi==2020.6.20 # via # -c requirements/deploy/../base/base.txt # sentry-sdk -newrelic==9.10.0 +newrelic==10.2.0 # via -r requirements/deploy/deploy.in python3-memcached==1.51 # via -r requirements/deploy/deploy.in -sentry-sdk==2.5.1 +sentry-sdk==2.17.0 # via -r requirements/deploy/deploy.in urllib3==2.2.1 # via # -c requirements/deploy/../base/base.txt # sentry-sdk -uwsgi==2.0.26 +uwsgi==2.0.28 # via -r requirements/deploy/deploy.in diff --git a/requirements/dev/dev.in b/requirements/dev/dev.in index d54a057b..9cd3436c 100644 --- a/requirements/dev/dev.in +++ b/requirements/dev/dev.in @@ -5,16 +5,16 @@ wheel # deploy -invoke-kubesae==0.1.0 -ansible==9.5.1 -cryptography==42.0.8 -cffi==1.16.0 -Jinja2==3.1.4 -openshift==0.13.2 -kubernetes==12.0.0 +invoke-kubesae +ansible +cryptography +cffi +Jinja2 +openshift +kubernetes kubernetes-validate~=1.29.1 -referencing==0.35.1 -jsonschema==4.22 +referencing +jsonschema troposphere diff --git a/requirements/dev/dev.txt b/requirements/dev/dev.txt index 99e784e2..85368399 100755 --- a/requirements/dev/dev.txt +++ b/requirements/dev/dev.txt @@ -4,32 +4,37 @@ # # pip-compile --output-file=requirements/dev/dev.txt requirements/dev/dev.in # -alabaster==0.7.12 +alabaster==0.7.16 # via sphinx -ansible==9.5.1 +annotated-types==0.7.0 + # via pydantic +ansible==10.5.0 # via # -r requirements/dev/dev.in # invoke-kubesae -ansible-core==2.16.7 +ansible-core==2.17.5 # via ansible -appnope==0.1.0 +anyio==4.6.2.post1 + # via + # -c requirements/dev/../base/base.txt + # starlette + # watchfiles +appnope==0.1.4 # via -r requirements/dev/dev.in -argh==0.26.2 - # via sphinx-autobuild asgiref==3.5.2 # via # -c requirements/dev/../base/base.txt # django -attrs==23.2.0 +asttokens==2.4.1 + # via stack-data +attrs==24.2.0 # via # jsonschema # referencing awscli==1.32.100 # via -r requirements/dev/dev.in -babel==2.8.0 +babel==2.16.0 # via sphinx -backcall==0.1.0 - # via ipython boto3==1.34.100 # via # -c requirements/dev/../base/base.txt @@ -40,18 +45,18 @@ botocore==1.34.100 # awscli # boto3 # s3transfer -cachetools==4.1.0 +cachetools==5.5.0 # via google-auth certifi==2020.6.20 # via # -c requirements/dev/../base/base.txt # kubernetes # requests -cffi==1.16.0 +cffi==1.17.1 # via # -r requirements/dev/dev.in # cryptography -cfn-flip==1.2.3 +cfn-flip==1.3.0 # via troposphere charset-normalizer==3.0.1 # via @@ -62,49 +67,65 @@ click==8.1.7 # -c requirements/dev/../base/base.txt # -c requirements/dev/../test/test.txt # cfn-flip -colorama==0.4.3 + # typer + # uvicorn +colorama==0.4.6 # via # awscli # invoke-kubesae -cryptography==42.0.8 + # sphinx-autobuild +cryptography==43.0.3 # via # -r requirements/dev/dev.in # ansible-core -decorator==4.4.2 - # via - # ipython - # traitlets +decorator==5.1.1 + # via ipython django==3.2.25 # via # -c requirements/dev/../base/base.txt # django-debug-toolbar -django-debug-toolbar==2.2 +django-debug-toolbar==4.3.0 # via -r requirements/dev/dev.in -docutils==0.15.2 +docutils==0.16 # via # awscli - # rstcheck + # rstcheck-core # sphinx -google-auth==1.14.3 +durationpy==0.9 + # via kubernetes +exceptiongroup==1.2.2 + # via + # -c requirements/dev/../base/base.txt + # -c requirements/dev/../test/test.txt + # anyio + # ipython +executing==2.1.0 + # via stack-data +google-auth==2.35.0 # via kubernetes +h11==0.14.0 + # via + # -c requirements/dev/../base/base.txt + # uvicorn idna==2.10 # via # -c requirements/dev/../base/base.txt + # anyio # requests -imagesize==1.2.0 +imagesize==1.4.1 # via sphinx -importlib-resources==6.4.0 +importlib-resources==6.4.5 # via kubernetes-validate -invoke==1.4.1 +invoke==2.2.0 # via invoke-kubesae invoke-kubesae==0.1.0 # via -r requirements/dev/dev.in -ipython==7.14.0 +ipython==8.29.0 # via -r requirements/dev/dev.in -ipython-genutils==0.2.0 - # via traitlets -jedi==0.17.0 - # via ipython +jedi==0.19.1 + # via + # ipython + # pudb jinja2==3.1.4 # via # -r requirements/dev/dev.in @@ -115,24 +136,30 @@ jmespath==1.0.1 # -c requirements/dev/../base/base.txt # boto3 # botocore -jsonschema==4.22.0 +jsonschema==4.23.0 # via # -r requirements/dev/dev.in # kubernetes-validate -jsonschema-specifications==2023.12.1 +jsonschema-specifications==2024.10.1 # via jsonschema -kubernetes==12.0.0 +kubernetes==31.0.0 # via # -r requirements/dev/dev.in # openshift kubernetes-validate==1.29.1 # via -r requirements/dev/dev.in -livereload==2.6.2 - # via sphinx-autobuild -markupsafe==2.1.1 +markdown-it-py==3.0.0 + # via rich +markupsafe==3.0.2 # via jinja2 -oauthlib==3.1.0 - # via requests-oauthlib +matplotlib-inline==0.1.7 + # via ipython +mdurl==0.1.2 + # via markdown-it-py +oauthlib==3.2.2 + # via + # kubernetes + # requests-oauthlib openshift==0.13.2 # via -r requirements/dev/dev.in packaging==24.1 @@ -140,41 +167,39 @@ packaging==24.1 # -c requirements/dev/../test/test.txt # ansible-core # kubernetes-validate + # pudb # sphinx -parso==0.7.0 +parso==0.8.4 # via jedi -pathtools==0.1.2 - # via - # sphinx-autobuild - # watchdog -pexpect==4.8.0 - # via ipython -pickleshare==0.7.5 +pexpect==4.9.0 # via ipython -pkgutil-resolve-name==1.3.10 - # via jsonschema -port-for==0.3.1 - # via sphinx-autobuild prompt-toolkit==3.0.47 # via # -c requirements/dev/../base/base.txt # ipython -ptyprocess==0.6.0 +ptyprocess==0.7.0 # via pexpect -pudb==2019.2 +pudb==2024.1.3 # via -r requirements/dev/dev.in -pyasn1==0.4.8 +pure-eval==0.2.3 + # via stack-data +pyasn1==0.6.1 # via # pyasn1-modules # rsa -pyasn1-modules==0.2.8 +pyasn1-modules==0.4.1 # via google-auth -pycparser==2.20 +pycparser==2.22 # via cffi -pygments==2.6.1 +pydantic==2.9.2 + # via rstcheck-core +pydantic-core==2.23.4 + # via pydantic +pygments==2.18.0 # via # ipython # pudb + # rich # sphinx python-dateutil==2.9.0.post0 # via @@ -187,9 +212,8 @@ python-string-utils==1.0.0 pytz==2022.1 # via # -c requirements/dev/../base/base.txt - # babel # django -pyyaml==5.3.1 +pyyaml==6.0.2 # via # -c requirements/dev/../test/test.txt # ansible-core @@ -197,7 +221,6 @@ pyyaml==5.3.1 # cfn-flip # kubernetes # kubernetes-validate - # sphinx-autobuild referencing==0.35.1 # via # -r requirements/dev/dev.in @@ -210,90 +233,118 @@ requests==2.32.3 # kubernetes # requests-oauthlib # sphinx -requests-oauthlib==1.3.0 +requests-oauthlib==2.0.0 # via kubernetes -resolvelib==0.5.4 +resolvelib==1.0.1 # via ansible-core -rpds-py==0.19.0 +rich==13.9.3 + # via typer +rpds-py==0.20.0 # via # jsonschema # referencing -rsa==3.4.2 +rsa==4.7.2 # via # awscli # google-auth -rstcheck==3.3.1 +rstcheck==6.2.4 # via -r requirements/dev/dev.in +rstcheck-core==1.2.1 + # via rstcheck s3transfer==0.10.1 # via # -c requirements/dev/../base/base.txt # awscli # boto3 +shellingham==1.5.4 + # via typer six==1.15.0 # via # -c requirements/dev/../base/base.txt # -c requirements/dev/../test/test.txt + # asttokens # cfn-flip - # google-auth # kubernetes - # livereload # openshift # python-dateutil - # traitlets - # websocket-client -snowballstemmer==2.0.0 +sniffio==1.3.1 + # via + # -c requirements/dev/../base/base.txt + # anyio +snowballstemmer==2.2.0 # via sphinx -sphinx==3.1.1 - # via -r requirements/dev/dev.in -sphinx-autobuild==0.7.1 +sphinx==5.3.0 + # via + # -r requirements/dev/dev.in + # sphinx-autobuild +sphinx-autobuild==2024.10.3 # via -r requirements/dev/dev.in -sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==1.0.3 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.4 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx sqlparse==0.3.1 # via # -c requirements/dev/../base/base.txt # django # django-debug-toolbar -tornado==6.0.4 - # via - # livereload - # sphinx-autobuild -traitlets==4.3.3 +stack-data==0.6.3 # via ipython -troposphere==3.1.1 +starlette==0.41.2 + # via sphinx-autobuild +traitlets==5.14.3 + # via + # ipython + # matplotlib-inline +troposphere==4.8.3 # via -r requirements/dev/dev.in +typer==0.12.5 + # via rstcheck typing-extensions==4.12.2 # via + # -c requirements/dev/../base/base.txt # -c requirements/dev/../test/test.txt + # anyio + # ipython # kubernetes-validate + # pydantic + # pydantic-core + # rich + # typer + # urwid + # uvicorn urllib3==2.2.1 # via # -c requirements/dev/../base/base.txt # botocore # kubernetes # requests -urwid==2.1.0 +urwid==2.6.16 + # via + # pudb + # urwid-readline +urwid-readline==0.15.1 # via pudb -watchdog==0.10.3 +uvicorn==0.32.0 + # via sphinx-autobuild +watchfiles==0.24.0 # via sphinx-autobuild wcwidth==0.2.13 # via # -c requirements/dev/../base/base.txt # prompt-toolkit -websocket-client==0.57.0 + # urwid +websocket-client==1.8.0 # via kubernetes -wheel==0.37.1 +websockets==13.1 + # via sphinx-autobuild +wheel==0.44.0 # via -r requirements/dev/dev.in - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements/test/test.in b/requirements/test/test.in index 9e3fa21d..7dac4448 100755 --- a/requirements/test/test.in +++ b/requirements/test/test.in @@ -3,13 +3,12 @@ pyyaml isort -pytest==8.2.2 -pytest-cov==5.0.0 +pytest +pytest-cov +pytest-django factory_boy -pytest-django==4.8.0 coverage # Linting flake8 -black==24.4.2 -pre-commit==3.5.0 -identify==2.5.36 +black +pre-commit diff --git a/requirements/test/test.txt b/requirements/test/test.txt index 34c9ae9b..b5a0fce1 100755 --- a/requirements/test/test.txt +++ b/requirements/test/test.txt @@ -4,76 +4,76 @@ # # pip-compile --output-file=requirements/test/test.txt requirements/test/test.in # -black==24.4.2 +black==24.10.0 # via -r requirements/test/test.in -cfgv==3.1.0 +cfgv==3.4.0 # via pre-commit click==8.1.7 # via # -c requirements/test/../base/base.txt # black -coverage[toml]==7.5.3 +coverage[toml]==7.6.4 # via # -r requirements/test/test.in # pytest-cov -distlib==0.3.6 +distlib==0.3.9 # via virtualenv -exceptiongroup==1.2.1 - # via pytest -factory-boy==2.12.0 +exceptiongroup==1.2.2 + # via + # -c requirements/test/../base/base.txt + # pytest +factory-boy==3.3.1 # via -r requirements/test/test.in -faker==4.1.0 +faker==30.8.1 # via factory-boy -filelock==3.9.0 +filelock==3.16.1 # via virtualenv -flake8==3.8.3 +flake8==7.1.1 # via -r requirements/test/test.in -identify==2.5.36 - # via - # -r requirements/test/test.in - # pre-commit +identify==2.6.1 + # via pre-commit iniconfig==2.0.0 # via pytest -isort==4.3.21 +isort==5.13.2 # via -r requirements/test/test.in -mccabe==0.6.1 +mccabe==0.7.0 # via flake8 -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via black -nodeenv==1.3.5 +nodeenv==1.9.1 # via pre-commit packaging==24.1 # via # black # pytest -pathspec==0.9.0 +pathspec==0.12.1 # via black -platformdirs==2.4.0 +platformdirs==4.3.6 # via # black # virtualenv pluggy==1.5.0 # via pytest -pre-commit==3.5.0 +pre-commit==4.0.1 # via -r requirements/test/test.in -pycodestyle==2.6.0 +pycodestyle==2.12.1 # via flake8 -pyflakes==2.2.0 +pyflakes==3.2.0 # via flake8 -pytest==8.2.2 +pytest==8.3.3 # via # -r requirements/test/test.in # pytest-cov # pytest-django pytest-cov==5.0.0 # via -r requirements/test/test.in -pytest-django==4.8.0 +pytest-django==4.9.0 # via -r requirements/test/test.in python-dateutil==2.9.0.post0 # via # -c requirements/test/../base/base.txt # faker -pyyaml==5.3.1 +pyyaml==6.0.2 # via # -r requirements/test/test.in # pre-commit @@ -81,14 +81,15 @@ six==1.15.0 # via # -c requirements/test/../base/base.txt # python-dateutil -text-unidecode==1.3 - # via faker -tomli==2.0.1 +tomli==2.0.2 # via # black # coverage # pytest typing-extensions==4.12.2 - # via black -virtualenv==20.17.1 + # via + # -c requirements/test/../base/base.txt + # black + # faker +virtualenv==20.27.0 # via pre-commit diff --git a/traffic_stops/celery.py b/traffic_stops/celery.py index a4492527..c7e0e0fc 100644 --- a/traffic_stops/celery.py +++ b/traffic_stops/celery.py @@ -1,15 +1,9 @@ -from __future__ import absolute_import - import os from django.conf import settings # noqa from celery import Celery -from . import load_env - -load_env.load_env() - # set the default Django settings module for the 'celery' program. os.environ.setdefault("DJANGO_SETTINGS_MODULE", "traffic_stops.settings") diff --git a/traffic_stops/settings/base.py b/traffic_stops/settings/base.py index 0463d6f0..cd506c8c 100755 --- a/traffic_stops/settings/base.py +++ b/traffic_stops/settings/base.py @@ -251,16 +251,6 @@ def __init__(self, tz_name=None): "level": "DEBUG", "propagate": False, }, - "caching": { - "handlers": ["console"], - "level": "DEBUG", - "propagate": False, - }, - "caching.invalidation": { - "handlers": ["console"], - "level": "INFO", - "propagate": False, - }, "celery": { "level": "INFO", "handlers": ["console"], @@ -316,9 +306,10 @@ def __init__(self, tz_name=None): LOGIN_URL = "account_login" LOGIN_REDIRECT_URL = "home" -REST_FRAMEWORK_EXTENSIONS = {"DEFAULT_CACHE_RESPONSE_TIMEOUT": 60 * 60 * 24 * 60} # 60 days - -CACHE_COUNT_TIMEOUT = 60 * 60 * 24 * 60 # 60 days +# Cache settings +CACHE_CLOUDFRONT_DISTRIBUTION_ID = os.getenv("CACHE_CLOUDFRONT_DISTRIBUTION_ID", "") +CACHE_BASICAUTH_USERNAME = os.getenv("CACHE_BASICAUTH_USERNAME", "") +CACHE_BASICAUTH_PASSWORD = os.getenv("CACHE_BASICAUTH_PASSWORD", "") CACHE_HOST = os.getenv("CACHE_HOST", "") if "redis" in CACHE_HOST: CACHES = { @@ -338,6 +329,8 @@ def __init__(self, tz_name=None): }, } +BROKER_URL = os.getenv("BROKER_URL", "redis://redis:6379/0") + CENSUS_API_KEY = os.getenv("CENSUS_API_KEY", "") NC_AUTO_IMPORT_DIRECTORY = "/tmp/nc-automated-import" diff --git a/traffic_stops/settings/deploy.py b/traffic_stops/settings/deploy.py index 650780d4..8cb95269 100755 --- a/traffic_stops/settings/deploy.py +++ b/traffic_stops/settings/deploy.py @@ -106,8 +106,6 @@ DATABASE_ETL_USER = "etl" -BROKER_URL = os.getenv("BROKER_URL", "redis://redis:6379/0") - REST_FRAMEWORK = {"DEFAULT_AUTHENTICATION_CLASSES": []} if ENVIRONMENT.upper() == "PRODUCTION": diff --git a/traffic_stops/settings/dev.py b/traffic_stops/settings/dev.py index d9cab990..894b6563 100644 --- a/traffic_stops/settings/dev.py +++ b/traffic_stops/settings/dev.py @@ -22,11 +22,14 @@ EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend" -CELERY_ALWAYS_EAGER = True +CELERY_ALWAYS_EAGER = os.getenv("CELERY_ALWAYS_EAGER", "True") == "True" CELERY_EAGER_PROPAGATES_EXCEPTIONS = True NC_AUTO_IMPORT_MONITORS = ("nc-monitor@example.com",) +ALLOWED_HOSTS = os.getenv("ALLOWED_HOSTS", "") +ALLOWED_HOSTS = ALLOWED_HOSTS.split(",") if ALLOWED_HOSTS else ["*"] + # Special test settings if "test" in sys.argv: PASSWORD_HASHERS = (