From e7b416ed01d722b7bb5211c4e0f8d4e431773f06 Mon Sep 17 00:00:00 2001 From: nutrina Date: Fri, 2 Aug 2024 18:40:59 +0300 Subject: [PATCH] feat: enable the build of the verifier docker image in the workflows (#650) * feat: enable the build of the verifier docker image in the workflows * feat: update verifier, make issue mandatory * feat: rework authentication to use verifier service for payload validation * fix: adjusting test cases + making fixes * comment invalid workflow config * add verifier service * update load balancer patern * fix: test_generic action * enable uptime robot dry rin --------- Co-authored-by: larisa17 Co-authored-by: Gerald Iakobinyi-Pich --- .../workflows/build_and_deploy_generic.yml | 26 ++ .github/workflows/test_generic.yml | 5 +- api/ceramic_cache/api/v1.py | 63 +-- .../test/test_authenticate_v1.py | 40 +- api/scorer/settings/base.py | 2 + infra/aws/index.ts | 47 +++ infra/aws/verifier.ts | 369 ++++++++++++++++++ verifier/src/index.js | 15 +- 8 files changed, 523 insertions(+), 44 deletions(-) create mode 100644 infra/aws/verifier.ts diff --git a/.github/workflows/build_and_deploy_generic.yml b/.github/workflows/build_and_deploy_generic.yml index 436808c0e..88c722b3b 100644 --- a/.github/workflows/build_and_deploy_generic.yml +++ b/.github/workflows/build_and_deploy_generic.yml @@ -92,6 +92,31 @@ jobs: dockerfile_name: ./indexer/Dockerfile build_dir: ./indexer/ + docker-verifier: + name: Build and push docker image for verifier + runs-on: ubuntu-latest + steps: + - name: Load secret + id: op-load-secret + uses: 1password/load-secrets-action@v1 + with: + export-env: true + env: + OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + AWS_ACCESS_KEY_ID: op://DevOps/passport-scorer-${{ inputs.environment }}-secrets/ci/AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY: op://DevOps/passport-scorer-${{ inputs.environment }}-secrets/ci/AWS_SECRET_ACCESS_KEY + - id: build_and_push_docker_image + uses: passportxyz/gh-workflows/.github/actions/build_and_push@v1 + with: + refspec: ${{ inputs.refspec }} + docker_tag: ${{ inputs.docker_tag }} + ecr_repository_name: passport-verifier + aws_region: us-west-2 + aws_access_key_id: ${{ env.AWS_ACCESS_KEY_ID }} + aws_secret_access_key: ${{ env.AWS_SECRET_ACCESS_KEY }} + dockerfile_name: ./verifier/Dockerfile + build_dir: ./verifier/ + deploy_preview: name: Preview - Deploying AWS Infra runs-on: ubuntu-latest @@ -149,6 +174,7 @@ jobs: docker-ecs, docker-indexer, docker-lambda, + docker-verifier, deploy_preview, deploy_confirm, ] diff --git a/.github/workflows/test_generic.yml b/.github/workflows/test_generic.yml index ca05c3122..20b06e8da 100644 --- a/.github/workflows/test_generic.yml +++ b/.github/workflows/test_generic.yml @@ -11,13 +11,10 @@ on: type: string required: true uptime-robot-monitor-dry-run: - type: choice + type: string description: "Set to '--dry-run' to not actually create monitors" default: "" required: false - options: - - "" - - "--dry-run" jobs: test: diff --git a/api/ceramic_cache/api/v1.py b/api/ceramic_cache/api/v1.py index a2403a2d9..adc6b09c4 100644 --- a/api/ceramic_cache/api/v1.py +++ b/api/ceramic_cache/api/v1.py @@ -4,10 +4,7 @@ from datetime import timedelta from typing import Any, Dict, List, Optional, Type -import api_logging as logging -import tos.api -import tos.schema -from account.models import Account, Community, Nonce +import requests from asgiref.sync import async_to_sync from django.conf import settings from django.contrib.auth import get_user_model @@ -24,6 +21,12 @@ # from ninja_jwt.schema import RefreshToken from ninja_jwt.settings import api_settings from ninja_jwt.tokens import RefreshToken, Token, TokenError + +import api_logging as logging +import tos.api +import tos.schema +from account.models import Account, Community, Nonce +from ceramic_cache.utils import get_utc_time from registry.api.utils import ( is_valid_address, ) @@ -42,15 +45,13 @@ from stake.api import handle_get_gtc_stake from stake.schema import GetSchemaResponse -from ceramic_cache.utils import get_utc_time - from ..exceptions import ( InternalServerException, InvalidDeleteCacheRequestException, TooManyStampsException, ) from ..models import CeramicCache -from ..utils import validate_dag_jws_payload, verify_jws +from ..utils import validate_dag_jws_payload from .schema import ( AccessTokenResponse, CacaoVerifySubmit, @@ -548,42 +549,50 @@ def handle_authenticate(payload: CacaoVerifySubmit) -> AccessTokenResponse: if not validate_dag_jws_payload({"nonce": payload.nonce}, payload.payload): log.error("Failed to validate nonce: '%s'", payload.nonce) raise FailedVerificationException(detail="Invalid nonce or payload!") - except Exception as exc: log.error("Failed authenticate request: '%s'", payload.dict(), exc_info=True) raise FailedVerificationException(detail="Invalid nonce or payload!") from exc try: - try: - verify_jws(payload.dict()) - - except Exception as exc: - log.error( - "Failed to authenticate request (verify_jws failed): '%s'", - payload.dict(), - exc_info=True, - ) - raise FailedVerificationException( - detail=f"Failed to authenticate request: {str(exc)}" - ) from exc + res = requests.post( + settings.VERIFIER_URL, + json={ + "signatures": payload.signatures, + "payload": payload.payload, + "cid": payload.cid, + "cacao": payload.cacao, + "issuer": payload.issuer, + }, + ) - token = DbCacheToken() - token["did"] = payload.issuer + if res.status_code == 200: + data = res.json() + if data.get("status") == "ok": + token = DbCacheToken() + token["did"] = payload.issuer + return { + "access": str(token.access_token), + } - return { - "access": str(token.access_token), - } + log.error( + "Failed to validate authentication payload (jws)! Response: %s\n%s", + res, + res.json(), + ) + raise FailedVerificationException(detail=f"JWS validation failed: {res.json()}") except APIException: # re-raise API exceptions raise except Exception as esc: log.error( - "Failed authenticate request (verify_jws failed): '%s'", + "Failed to authenticate request (verify_jws failed): '%s'", payload.dict(), exc_info=True, ) - raise APIException(detail=f"Failed authenticate request: {str(esc)}") from esc + raise APIException( + detail=f"Failed to authenticate request: {str(esc)}" + ) from esc def get_detailed_score_response_for_address( diff --git a/api/ceramic_cache/test/test_authenticate_v1.py b/api/ceramic_cache/test/test_authenticate_v1.py index 7154051e4..6b0ad2df7 100644 --- a/api/ceramic_cache/test/test_authenticate_v1.py +++ b/api/ceramic_cache/test/test_authenticate_v1.py @@ -3,10 +3,11 @@ from collections import namedtuple import pytest -from account.models import Nonce from django.test import Client from ninja_jwt.tokens import AccessToken +from account.models import Nonce + pytestmark = pytest.mark.django_db client = Client() @@ -39,14 +40,19 @@ def test_authenticate_validates_payload(self, mocker): """ We expect that the authenticate request: 1. validates the payload against the nonce - 2. makes a validation request for checkting the jws to verify_jws, and verify_jws does not throw + 2. makes a validation request for checking the jws to th everifier returns success If both are ok, the test should succeed """ - MockedRequestResponse = namedtuple("MockedRequestResponse", "status_code") + + class MockedRequestResponse: + status_code = 200 + + def json(self): + return {"status": "ok"} + with mocker.patch( - "ceramic_cache.api.v1.verify_jws", - return_value=None, + "ceramic_cache.api.v1.requests.post", return_value=MockedRequestResponse() ): with mocker.patch( "ceramic_cache.api.v1.validate_dag_jws_payload", return_value=True @@ -122,14 +128,19 @@ def test_authenticate_fails_when_validating_jws_fails(self, mocker): """ We expect that the authenticate request: 1. validates the payload against the nonce - 2. validates the jws with verify_jws + 2. validates the jws with verifier The test should fail at step 2 if the validation returns anything other than 200 """ + class MockedRequestResponse: + status_code = 200 + + def json(self): + return {"status": "failure", "error": "something went wrong"} + with mocker.patch( - "ceramic_cache.api.v1.verify_jws", - side_effect=Exception("JWS validation failed"), + "ceramic_cache.api.v1.requests.post", return_value=MockedRequestResponse() ): with mocker.patch( "ceramic_cache.api.v1.validate_dag_jws_payload", return_value=True @@ -153,14 +164,19 @@ def test_authenticate_fails_when_validating_jws_throws(self, mocker): """ We expect that the authenticate request: 1. validates the payload against the nonce - 2. validates the jws with verify_jws + 2. validates the jws with the verifier The test should fail at step 2 if the validation throws """ + class MockedRequestResponse: + status_code = 200 + + def json(self): + return {"status": "failure", "error": "something went wrong"} + with mocker.patch( - "ceramic_cache.api.v1.verify_jws", - side_effect=Exception("this is broken"), + "ceramic_cache.api.v1.requests.post", return_value=MockedRequestResponse() ): with mocker.patch( "ceramic_cache.api.v1.validate_dag_jws_payload", return_value=True @@ -177,4 +193,4 @@ def test_authenticate_fails_when_validating_jws_throws(self, mocker): assert auth_response.status_code == 400 assert "detail" in json_data - assert json_data["detail"].startswith("Failed to authenticate request") + assert json_data["detail"].startswith("JWS validation failed") diff --git a/api/scorer/settings/base.py b/api/scorer/settings/base.py index b89e55be7..1fc24775d 100644 --- a/api/scorer/settings/base.py +++ b/api/scorer/settings/base.py @@ -453,3 +453,5 @@ "127.0.0.1", # ... ] + +VERIFIER_URL = env("VERIFIER_URL", default="http://localhost:8001/verifier/verify") diff --git a/infra/aws/index.ts b/infra/aws/index.ts index 6c0cb630b..45d9295dd 100644 --- a/infra/aws/index.ts +++ b/infra/aws/index.ts @@ -21,6 +21,7 @@ import { createScheduledTask } from "../lib/scorer/scheduledTasks"; import { secretsManager } from "infra-libs"; import * as op from "@1password/op-js"; +import { createVerifierService } from "./verifier"; // The following vars are not allowed to be undefined, hence the `${...}` magic @@ -57,6 +58,7 @@ const publicDataDomain = const current = aws.getCallerIdentity({}); const regionData = aws.getRegion({}); + export const dockerGtcPassportScorerImage = pulumi .all([current, regionData]) .apply( @@ -78,6 +80,13 @@ export const dockerGtcStakingIndexerImage = pulumi `${acc.accountId}.dkr.ecr.${region.id}.amazonaws.com/passport-indexer:${DOCKER_IMAGE_TAG}` ); +export const verifierDockerImage = pulumi + .all([current, regionData]) + .apply( + ([acc, region]) => + `${acc.accountId}.dkr.ecr.${region.id}.amazonaws.com/passport-verifier:${DOCKER_IMAGE_TAG}` + ); + const redashDbUsername = op.read.parse( `op://DevOps/passport-scorer-${stack}-env/ci/REDASH_DB_USER` ); @@ -618,6 +627,10 @@ const apiEnvironment = [ name: "ALLOWED_HOSTS", value: JSON.stringify([domain, "*"]), }, + { + name: "VERIFIER_URL", + value: "http://core-alb.private.gitcoin.co/verifier/verify", + }, ].sort(secretsManager.sortByName); const apiSecrets = secretsManager.syncSecretsAndGetRefs({ @@ -1478,6 +1491,10 @@ const lambdaSettings = { name: "SCORER_SERVER_SSM_ARN", value: scorerSecret.arn, }, + { + name: "VERIFIER_URL", + value: "http://core-alb.private.gitcoin.co/verifier/verify", + }, ].sort(secretsManager.sortByName), roleAttachments: httpRoleAttachments, role: httpLambdaRole, @@ -1485,6 +1502,7 @@ const lambdaSettings = { alb: alb, }; +// Create alarms for the load balancer createLoadBalancerAlarms( "scorer-service", alb.arnSuffix, @@ -1492,6 +1510,7 @@ createLoadBalancerAlarms( pagerdutyTopic ); +// Manage Lamba services buildHttpLambdaFn( { ...lambdaSettings, @@ -1633,3 +1652,31 @@ buildQueueLambdaFn({ role: queueLambdaRole, queue: rescoreQueue, }); + +// VERIFIER +const privateAlbHttpListenerArn = coreInfraStack.getOutput( + "privateAlbHttpListenerArn" +); +const privatprivateAlbArnSuffixeAlbHttpListenerArn = coreInfraStack.getOutput( + "privateAlbArnSuffix" +); + +const verifier = pulumi + .all([verifierDockerImage]) + .apply(([_verifierDockerImage]) => + createVerifierService({ + vpcId: vpcID as pulumi.Output, + albListenerArn: privateAlbHttpListenerArn as pulumi.Output, + privateAlbArnSuffix: + privatprivateAlbArnSuffixeAlbHttpListenerArn as pulumi.Output, + albPriorityRule: 1011, + pathPatterns: ["/verifier/*"], + clusterArn: cluster.arn, + clusterName: cluster.name, + dockerImage: _verifierDockerImage, + vpcPrivateSubnets: vpcPrivateSubnetIds as pulumi.Output, + snsTopicArn: pagerdutyTopic.arn, + }) + ); + +export const verifierTaskArn = verifier.task.arn; diff --git a/infra/aws/verifier.ts b/infra/aws/verifier.ts new file mode 100644 index 000000000..c8d9253be --- /dev/null +++ b/infra/aws/verifier.ts @@ -0,0 +1,369 @@ +import * as pulumi from "@pulumi/pulumi"; +import * as aws from "@pulumi/aws"; + +type StackType = "review" | "staging" | "production"; +export const stack: StackType = pulumi.getStack() as StackType; + +const DEFAULT_TAGS = { + Name: "verifier", + Environment: "stack", + Project: "passport-scorer", +}; + +const serviceResources = Object({ + review: { + memory: 512, // 512 MiB + cpu: 256, // 0.25 vCPU + }, + staging: { + memory: 1024, // 1 GB + cpu: 512, // 0.5 vCPU + }, + production: { + memory: 1024, // 1 GB + cpu: 512, // 0.5 vCPU + }, +}); + +const logsRetention = Object({ + review: 1, + staging: 7, + production: 30, +}); + +export const createVerifierService = ({ + vpcId, + albListenerArn, + privateAlbArnSuffix, + albPriorityRule, + pathPatterns, + clusterArn, + clusterName, + dockerImage, + vpcPrivateSubnets, + snsTopicArn, +}: { + vpcId: pulumi.Output; + albListenerArn: pulumi.Output; + privateAlbArnSuffix: pulumi.Output; + albPriorityRule: number; + pathPatterns: string[]; + clusterArn: pulumi.Output; + clusterName: pulumi.Output; + dockerImage: string; + vpcPrivateSubnets: pulumi.Output; + snsTopicArn: pulumi.Output; +}) => { + // Service Role + const serviceRole = new aws.iam.Role("verifier-ecs-role", { + assumeRolePolicy: JSON.stringify({ + Version: "2012-10-17", + Statement: [ + { + Sid: "EcsAssume", + Action: "sts:AssumeRole", + Effect: "Allow", + Principal: { + Service: "ecs-tasks.amazonaws.com", + }, + }, + ], + }), + managedPolicyArns: [ + "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy", + ], + tags: { + ...DEFAULT_TAGS, + }, + }); + + // Log Group + const serviceLogGroup = new aws.cloudwatch.LogGroup("verifier", { + name: "verifier", + retentionInDays: logsRetention[stack], + tags: { + ...DEFAULT_TAGS, + }, + }); + + // Security Group + const serviceSG = new aws.ec2.SecurityGroup(`verifier`, { + name: `verifier`, + vpcId: vpcId, + description: `Security Group for verifier service.`, + tags: { + ...DEFAULT_TAGS, + Name: `verifier`, + }, + }); + + const sgIngressRule80 = new aws.ec2.SecurityGroupRule( + `verifier-80`, + { + securityGroupId: serviceSG.id, + type: "ingress", + fromPort: 80, + toPort: 80, + protocol: "tcp", + cidrBlocks: ["10.0.0.0/16"], // For now allow from VPC + }, + { + dependsOn: [serviceSG], + } + ); + + // Allow all outbound traffic + const sgEgressRule = new aws.ec2.SecurityGroupRule( + `verifier-all`, + { + securityGroupId: serviceSG.id, + type: "egress", + fromPort: 0, + toPort: 0, + protocol: "-1", + cidrBlocks: ["0.0.0.0/0"], + }, + { + dependsOn: [serviceSG], + } + ); + + // ALB Target Group & Listener + const albTargetGroup = new aws.lb.TargetGroup(`verifier`, { + name: `verifier`, + vpcId: vpcId, + healthCheck: { + enabled: true, + healthyThreshold: 3, + interval: 30, + matcher: "200", + path: "/verifier/health", + port: "traffic-port", + protocol: "HTTP", + timeout: 5, + unhealthyThreshold: 5, + }, + port: 80, + protocol: "HTTP", + targetType: "ip", + tags: { + ...DEFAULT_TAGS, + Name: `verifier`, + }, + }); + + const albListenerRule = new aws.lb.ListenerRule(`verifier-http`, { + listenerArn: albListenerArn, + priority: albPriorityRule, // This needs to be grater than the priority number for passport-scroll-badge-service + actions: [ + { + type: "forward", + targetGroupArn: albTargetGroup.arn, + }, + ], + conditions: [ + { + pathPattern: { + values: pathPatterns, + }, + }, + ], + tags: { + ...DEFAULT_TAGS, + Name: `verifier-http`, + }, + }); + + // Task Definition & service + + const containerDefinitions = JSON.stringify([ + { + name: "verifier", + image: dockerImage, + cpu: serviceResources[stack]["cpu"], + memory: serviceResources[stack]["memory"], + links: [], + essential: true, + portMappings: [ + { + containerPort: 80, + hostPort: 80, + }, + ], + logConfiguration: { + logDriver: "awslogs", + options: { + "awslogs-group": "verifier", // "${serviceLogGroup.name}`, + "awslogs-region": "us-west-2", // `${regionId}`, + "awslogs-create-group": "true", + "awslogs-stream-prefix": "verifier", + }, + }, + mountPoints: [], + volumesFrom: [], + environment: [ + { + name: "VERIFIER_PORT", + value: "80", + }, + ], + secrets: [], + }, + ]); + + const taskDefinition = new aws.ecs.TaskDefinition(`verifier`, { + family: `verifier`, + containerDefinitions, + executionRoleArn: serviceRole.arn, + cpu: serviceResources[stack]["cpu"], + memory: serviceResources[stack]["memory"], + networkMode: "awsvpc", + requiresCompatibilities: ["FARGATE"], + tags: { + ...DEFAULT_TAGS, + EcsService: `verifier`, + }, + }); + + const service = new aws.ecs.Service( + `verifier`, + { + cluster: clusterArn, + desiredCount: stack === "production" ? 2 : 1, + enableEcsManagedTags: true, + enableExecuteCommand: false, + launchType: "FARGATE", + loadBalancers: [ + { + containerName: "verifier", + containerPort: 80, + targetGroupArn: albTargetGroup.arn, + }, + ], + name: `verifier`, + networkConfiguration: { + subnets: vpcPrivateSubnets, + securityGroups: [serviceSG.id], + }, + propagateTags: "TASK_DEFINITION", + taskDefinition: taskDefinition.arn, + tags: { + ...DEFAULT_TAGS, + Name: `verifier`, + }, + }, + { + dependsOn: [albTargetGroup, taskDefinition], + } + ); + + // Auto Scaling + + const ecsAutoScalingTarget = new aws.appautoscaling.Target( + "autoscaling_target", + { + maxCapacity: 10, + minCapacity: 1, + resourceId: pulumi.interpolate`service/${clusterName}/${service.name}`, + scalableDimension: "ecs:service:DesiredCount", + serviceNamespace: "ecs", + } + ); + + const ecsAutoScalingPolicy = new aws.appautoscaling.Policy( + "autoscaling-policy", + { + policyType: "TargetTrackingScaling", + resourceId: ecsAutoScalingTarget.resourceId, + scalableDimension: ecsAutoScalingTarget.scalableDimension, + serviceNamespace: ecsAutoScalingTarget.serviceNamespace, + targetTrackingScalingPolicyConfiguration: { + predefinedMetricSpecification: { + predefinedMetricType: "ECSServiceAverageCPUUtilization", + }, + targetValue: 50, + scaleInCooldown: 300, + scaleOutCooldown: 300, + }, + } + ); + + // Alerts + // Send an alert on alb target 500 + const metricNamespace = "AWS/ApplicationELB"; + + const http5xxTargetAlarm = new aws.cloudwatch.MetricAlarm( + `HTTP-Target-5XX-verifier`, + { + tags: { name: `HTTP-Target-5XX-verifier` }, + name: `HTTP-Target-5XX-verifier`, + alarmActions: [snsTopicArn], + okActions: [snsTopicArn], + comparisonOperator: "GreaterThanThreshold", + datapointsToAlarm: 3, + evaluationPeriods: 5, + metricQueries: [ + { + id: "m1", + metric: { + metricName: "RequestCount", + dimensions: { + LoadBalancer: privateAlbArnSuffix, + TargetGroup: albTargetGroup.arnSuffix, + }, + namespace: metricNamespace, + period: 60, + stat: "Sum", + }, + }, + { + id: "m2", + metric: { + metricName: "HTTPCode_Target_5XX_Count", + dimensions: { + LoadBalancer: privateAlbArnSuffix, + TargetGroup: albTargetGroup.arnSuffix, + }, + namespace: metricNamespace, + period: 60, + stat: "Sum", + }, + }, + { + expression: "m2 / m1", + id: "e1", + label: "Percent of target 5XX errors", + returnData: true, + }, + ], + threshold: 0.1, + } + ); + + // Alert on task count + const runningTaskCountAlarm = new aws.cloudwatch.MetricAlarm( + `RunningTaskCount-verifier`, + { + tags: { name: `RunningTaskCount-verifier` }, + alarmActions: [snsTopicArn], + okActions: [snsTopicArn], + comparisonOperator: "GreaterThanThreshold", + datapointsToAlarm: 1, + dimensions: { + ClusterName: clusterName, + ServiceName: service.name, + }, + evaluationPeriods: 1, + metricName: "RunningTaskCount", + name: `RunningTaskCount-verifier`, + namespace: "ECS/ContainerInsights", + period: 300, + statistic: "Average", + threshold: 7, + } + ); + return { + task: taskDefinition, + service: service, + }; +}; diff --git a/verifier/src/index.js b/verifier/src/index.js index 3759cf7c1..c03c44f41 100644 --- a/verifier/src/index.js +++ b/verifier/src/index.js @@ -11,13 +11,26 @@ import("dids").then((dids) => { const Cacao = didtools.Cacao; const CID = multiformats.CID; - app.post("/verify", (req, res) => { + app.get("/verifier/health", (req, res) => { + res.json({ health: "ok" }); + }); + + app.post("/verifier/verify", (req, res) => { const jws_restored = { signatures: req.body.signatures, payload: req.body.payload, cid: CID.decode(new Uint8Array(req.body.cid)), }; + if (!req.body.issuer || req.body.issuer === "") { + res.status(400); + const msg = + "Verification failed, 'issuer' is required in body!"; + console.error(msg); + res.json({ status: "failed", error: msg }); + return; + } + Cacao.fromBlockBytes(new Uint8Array(req.body.cacao)).then((cacao) => { const did = new DID({ resolver: KeyResolver.getResolver(),