From 7b4d9e8df207bf38b023745ad040735b9c408155 Mon Sep 17 00:00:00 2001 From: Dev Singh Date: Mon, 9 Sep 2024 22:30:34 -0500 Subject: [PATCH] enable mass presigned download support (#43) * enable mass presign support * fix * fix indent * update swagger * update method * stuff * check a few of them --- .gitignore | 3 +- .mise.toml | 2 + api/requirements-testing.txt | 3 +- api/requirements.txt | 4 +- api/util/oai.py | 2 +- api/util/s3.py | 20 +++++++ api/util/server.py | 36 ++++++++++++- api/util/structs.py | 5 +- docs/swagger-resume-book.yml | 22 ++++++++ .../test_recruiter_mass_download.py | 53 +++++++++++++++++++ 10 files changed, 144 insertions(+), 6 deletions(-) create mode 100644 .mise.toml create mode 100644 tests/live_integration/test_recruiter_mass_download.py diff --git a/.gitignore b/.gitignore index d87bb2f..85733fa 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ __pycache__ test.json build .vscode/settings.json -node_modules/ \ No newline at end of file +node_modules/ +api/.mise.toml diff --git a/.mise.toml b/.mise.toml new file mode 100644 index 0000000..6f3b64a --- /dev/null +++ b/.mise.toml @@ -0,0 +1,2 @@ +[tools] +python = "3.10" diff --git a/api/requirements-testing.txt b/api/requirements-testing.txt index c967137..49635d1 100644 --- a/api/requirements-testing.txt +++ b/api/requirements-testing.txt @@ -1,2 +1,3 @@ pytest -moto \ No newline at end of file +moto +pyjwt \ No newline at end of file diff --git a/api/requirements.txt b/api/requirements.txt index 135930d..d4f52c4 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -1,4 +1,6 @@ aws-lambda-powertools pydantic[email] psycopg[binary] -openai \ No newline at end of file +openai +aioboto3 +pyjwt \ No newline at end of file diff --git a/api/util/oai.py b/api/util/oai.py index 2c76892..c7e0677 100644 --- a/api/util/oai.py +++ b/api/util/oai.py @@ -26,7 +26,7 @@ def oai_get_profile_json(client, resume_text, role_type, role_description): "role": "user", "content": resume_text, }, - { + { "role": "user", "content": f"I am interested in {role_type} role for jobs involving {role_description}.", } diff --git a/api/util/s3.py b/api/util/s3.py index 23ce674..07c42d6 100644 --- a/api/util/s3.py +++ b/api/util/s3.py @@ -2,8 +2,28 @@ from util.logging import get_logger import boto3 import re +import aioboto3 +import asyncio logger = get_logger() +async def async_generate_presigned_urls(bucket_name, keys, expiration=14400): + async with aioboto3.client('s3') as s3_client: + tasks = [] + + for key in keys: + tasks.append( + s3_client.generate_presigned_url( + 'get_object', + Params={'Bucket': bucket_name, 'Key': key}, + ExpiresIn=expiration + ) + ) + + # Gather all presigned URL creation tasks asynchronously + presigned_urls = await asyncio.gather(*tasks) + + return presigned_urls + def create_presigned_url_from_s3_url(s3_client, s3_url, expiration=60): """ Generate a presigned URL to share an S3 object diff --git a/api/util/server.py b/api/util/server.py index 977e3bb..d6d1fa0 100644 --- a/api/util/server.py +++ b/api/util/server.py @@ -16,6 +16,7 @@ from util.postgres import get_db_connection from util.structs import ( DEFAULT_USER_PROFILE, + MassDownloadResumesRequest, ProfileSearchRequest, ResumeUploadPresignedRequest, StudentProfileDetails, @@ -23,10 +24,11 @@ convert_dict_keys_snake_to_camel, ) from util.environ import get_run_environment -from util.s3 import create_presigned_url_for_put, create_presigned_url_from_s3_url +from util.s3 import create_presigned_url_for_put, create_presigned_url_from_s3_url, async_generate_presigned_urls from util.logging import get_logger from util.secretsmanager import get_parameter_from_sm from psycopg.rows import dict_row +import asyncio RUN_ENV = get_run_environment() logger = get_logger() @@ -312,3 +314,35 @@ def recruiter_perform_search(): content_type=content_types.APPLICATION_JSON, body=search_result, ) + +@app.post("/api/v1/recruiter/mass_download") +def recruiter_perform_search(): + json_body: dict = app.current_event.json_body or {} + urls = [] + try: + data = MassDownloadResumesRequest(**json_body).model_dump() + if len(urls) > 10: + logger.info("Using asyncio loop") + keys = [f"resume_{username}.pdf" for username in data['usernames']] + urls = asyncio.run(async_generate_presigned_urls(S3_BUCKET, keys)) + else: + logger.info("Using blocking loop") + urls = [create_presigned_url_from_s3_url(s3_client, f"s3://{S3_BUCKET}/resume_{username}.pdf") for username in data['usernames'] ] + except pydantic.ValidationError as e: + return Response( + status_code=403, + content_type=content_types.APPLICATION_JSON, + body={"message": "Error validating payload", "details": str(e)}, + ) + except Exception as e: + logger.error(traceback.format_exc()) + return Response( + status_code=500, + content_type=content_types.APPLICATION_JSON, + body={"message": "Error performing profile search", "details": str(e)}, + ) + return Response( + status_code=200, + content_type=content_types.APPLICATION_JSON, + body=urls, + ) \ No newline at end of file diff --git a/api/util/structs.py b/api/util/structs.py index 98125f2..2dcca4f 100644 --- a/api/util/structs.py +++ b/api/util/structs.py @@ -1,5 +1,5 @@ from typing import List, Literal, Optional -from pydantic import BaseModel, ConfigDict, EmailStr, AnyUrl, Field, HttpUrl +from pydantic import BaseModel, ConfigDict, EmailStr, AnyUrl, Field, HttpUrl, validator from .oai import LENGTH_LIMIT class DegreeListing(BaseModel): @@ -41,6 +41,9 @@ class GenerateProfileRequest(BaseModel): roleType: Literal["Internship"] | Literal['Full-Time'] | Literal['Research Assistant'] roleKeywords: List[str] +class MassDownloadResumesRequest(BaseModel): + usernames: List[str] + DEFAULT_USER_PROFILE = { "defaultResponse": True, "username": "someone@illinois.edu", diff --git a/docs/swagger-resume-book.yml b/docs/swagger-resume-book.yml index 60784d5..f45311e 100644 --- a/docs/swagger-resume-book.yml +++ b/docs/swagger-resume-book.yml @@ -184,3 +184,25 @@ paths: type: aws_proxy uri: Fn::Sub: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${LambdaFunctionName}/invocations" + + /api/v1/recruiter/mass_download: + post: + summary: Recruiter mode - mass download resume PDFs + operationId: recruiterMassDownloadPDFs + responses: + 200: + description: OK + + security: + - CombinedAuthorizer: [] + + x-amazon-apigateway-integration: + responses: + default: + statusCode: 200 + passthroughBehavior: when_no_match + httpMethod: POST + contentHandling: CONVERT_TO_TEXT + type: aws_proxy + uri: + Fn::Sub: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${LambdaFunctionName}/invocations" diff --git a/tests/live_integration/test_recruiter_mass_download.py b/tests/live_integration/test_recruiter_mass_download.py new file mode 100644 index 0000000..190df45 --- /dev/null +++ b/tests/live_integration/test_recruiter_mass_download.py @@ -0,0 +1,53 @@ +import requests + +def test_unauthenticated(api_client): + """Sad Path: Test that accessing the profile when not correctly authenticated returns a failure.""" + response = api_client.get( + "/api/v1/student/profile", headers={"Authorization": "Bearer invalid"} + ) + assert response.status_code == 403 + assert response.json() == { + "Message": "User is not authorized to access this resource with an explicit deny" + } + +def test_student_noaccess(api_client, jwt_generator): + """Sad Path: Test that accessing the profile when authenticated as a student returns a failure.""" + jwt = jwt_generator(role="student", env="dev", email="noone@testing.megacorp.com") + response = api_client.post( + "/api/v1/recruiter/mass_download", headers={"Authorization": f"Bearer {jwt}"} + ) + assert response.status_code == 403 + assert response.json() == { + "Message": "User is not authorized to access this resource" + } + + +def test_one_profile(api_client, jwt_generator): + """Happy path: test that we can download one profile.""" + jwt = jwt_generator(role="recruiter", env="dev", email="noone@testing.megacorp.com") + response = api_client.post( + "/api/v1/recruiter/mass_download", headers={"Authorization": f"Bearer {jwt}"}, + json={"usernames": ["dsingh14@illinois.edu"]} + ) + assert response.status_code == 200 + rjson = response.json() + assert len(rjson) == 1 + s3resp = requests.get(rjson[0]) + assert s3resp.status_code == 200 + +def test_twenty_profiles(api_client, jwt_generator): + """Happy path: test that we can download one profile.""" + jwt = jwt_generator(role="recruiter", env="dev", email="noone@testing.megacorp.com") + response = api_client.post( + "/api/v1/recruiter/mass_download", headers={"Authorization": f"Bearer {jwt}"}, + json={"usernames": ["dsingh14@illinois.edu"] * 20} + ) + assert response.status_code == 200 + rjson = response.json() + assert len(rjson) == 20 + s3resp = requests.get(rjson[0]) + assert s3resp.status_code == 200 + s3resp = requests.get(rjson[10]) + assert s3resp.status_code == 200 + s3resp = requests.get(rjson[19]) + assert s3resp.status_code == 200 \ No newline at end of file