Skip to content

Commit

Permalink
enable mass presigned download support (#43)
Browse files Browse the repository at this point in the history
* enable mass presign support

* fix

* fix indent

* update swagger

* update method

* stuff

* check a few of them
  • Loading branch information
devksingh4 authored Sep 10, 2024
1 parent 5cbe789 commit 7b4d9e8
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 6 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ __pycache__
test.json
build
.vscode/settings.json
node_modules/
node_modules/
api/.mise.toml
2 changes: 2 additions & 0 deletions .mise.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[tools]
python = "3.10"
3 changes: 2 additions & 1 deletion api/requirements-testing.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pytest
moto
moto
pyjwt
4 changes: 3 additions & 1 deletion api/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
aws-lambda-powertools
pydantic[email]
psycopg[binary]
openai
openai
aioboto3
pyjwt
2 changes: 1 addition & 1 deletion api/util/oai.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def oai_get_profile_json(client, resume_text, role_type, role_description):
"role": "user",
"content": resume_text,
},
{
{
"role": "user",
"content": f"I am interested in {role_type} role for jobs involving {role_description}.",
}
Expand Down
20 changes: 20 additions & 0 deletions api/util/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,28 @@
from util.logging import get_logger
import boto3
import re
import aioboto3
import asyncio
logger = get_logger()

async def async_generate_presigned_urls(bucket_name, keys, expiration=14400):
async with aioboto3.client('s3') as s3_client:
tasks = []

for key in keys:
tasks.append(
s3_client.generate_presigned_url(
'get_object',
Params={'Bucket': bucket_name, 'Key': key},
ExpiresIn=expiration
)
)

# Gather all presigned URL creation tasks asynchronously
presigned_urls = await asyncio.gather(*tasks)

return presigned_urls

def create_presigned_url_from_s3_url(s3_client, s3_url, expiration=60):
"""
Generate a presigned URL to share an S3 object
Expand Down
36 changes: 35 additions & 1 deletion api/util/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,19 @@
from util.postgres import get_db_connection
from util.structs import (
DEFAULT_USER_PROFILE,
MassDownloadResumesRequest,
ProfileSearchRequest,
ResumeUploadPresignedRequest,
StudentProfileDetails,
GenerateProfileRequest,
convert_dict_keys_snake_to_camel,
)
from util.environ import get_run_environment
from util.s3 import create_presigned_url_for_put, create_presigned_url_from_s3_url
from util.s3 import create_presigned_url_for_put, create_presigned_url_from_s3_url, async_generate_presigned_urls
from util.logging import get_logger
from util.secretsmanager import get_parameter_from_sm
from psycopg.rows import dict_row
import asyncio

RUN_ENV = get_run_environment()
logger = get_logger()
Expand Down Expand Up @@ -312,3 +314,35 @@ def recruiter_perform_search():
content_type=content_types.APPLICATION_JSON,
body=search_result,
)

@app.post("/api/v1/recruiter/mass_download")
def recruiter_perform_search():
json_body: dict = app.current_event.json_body or {}
urls = []
try:
data = MassDownloadResumesRequest(**json_body).model_dump()
if len(urls) > 10:
logger.info("Using asyncio loop")
keys = [f"resume_{username}.pdf" for username in data['usernames']]
urls = asyncio.run(async_generate_presigned_urls(S3_BUCKET, keys))
else:
logger.info("Using blocking loop")
urls = [create_presigned_url_from_s3_url(s3_client, f"s3://{S3_BUCKET}/resume_{username}.pdf") for username in data['usernames'] ]
except pydantic.ValidationError as e:
return Response(
status_code=403,
content_type=content_types.APPLICATION_JSON,
body={"message": "Error validating payload", "details": str(e)},
)
except Exception as e:
logger.error(traceback.format_exc())
return Response(
status_code=500,
content_type=content_types.APPLICATION_JSON,
body={"message": "Error performing profile search", "details": str(e)},
)
return Response(
status_code=200,
content_type=content_types.APPLICATION_JSON,
body=urls,
)
5 changes: 4 additions & 1 deletion api/util/structs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import List, Literal, Optional
from pydantic import BaseModel, ConfigDict, EmailStr, AnyUrl, Field, HttpUrl
from pydantic import BaseModel, ConfigDict, EmailStr, AnyUrl, Field, HttpUrl, validator
from .oai import LENGTH_LIMIT

class DegreeListing(BaseModel):
Expand Down Expand Up @@ -41,6 +41,9 @@ class GenerateProfileRequest(BaseModel):
roleType: Literal["Internship"] | Literal['Full-Time'] | Literal['Research Assistant']
roleKeywords: List[str]

class MassDownloadResumesRequest(BaseModel):
usernames: List[str]

DEFAULT_USER_PROFILE = {
"defaultResponse": True,
"username": "someone@illinois.edu",
Expand Down
22 changes: 22 additions & 0 deletions docs/swagger-resume-book.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,25 @@ paths:
type: aws_proxy
uri:
Fn::Sub: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${LambdaFunctionName}/invocations"

/api/v1/recruiter/mass_download:
post:
summary: Recruiter mode - mass download resume PDFs
operationId: recruiterMassDownloadPDFs
responses:
200:
description: OK

security:
- CombinedAuthorizer: []

x-amazon-apigateway-integration:
responses:
default:
statusCode: 200
passthroughBehavior: when_no_match
httpMethod: POST
contentHandling: CONVERT_TO_TEXT
type: aws_proxy
uri:
Fn::Sub: "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/arn:aws:lambda:${AWS::Region}:${AWS::AccountId}:function:${LambdaFunctionName}/invocations"
53 changes: 53 additions & 0 deletions tests/live_integration/test_recruiter_mass_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import requests

def test_unauthenticated(api_client):
"""Sad Path: Test that accessing the profile when not correctly authenticated returns a failure."""
response = api_client.get(
"/api/v1/student/profile", headers={"Authorization": "Bearer invalid"}
)
assert response.status_code == 403
assert response.json() == {
"Message": "User is not authorized to access this resource with an explicit deny"
}

def test_student_noaccess(api_client, jwt_generator):
"""Sad Path: Test that accessing the profile when authenticated as a student returns a failure."""
jwt = jwt_generator(role="student", env="dev", email="noone@testing.megacorp.com")
response = api_client.post(
"/api/v1/recruiter/mass_download", headers={"Authorization": f"Bearer {jwt}"}
)
assert response.status_code == 403
assert response.json() == {
"Message": "User is not authorized to access this resource"
}


def test_one_profile(api_client, jwt_generator):
"""Happy path: test that we can download one profile."""
jwt = jwt_generator(role="recruiter", env="dev", email="noone@testing.megacorp.com")
response = api_client.post(
"/api/v1/recruiter/mass_download", headers={"Authorization": f"Bearer {jwt}"},
json={"usernames": ["dsingh14@illinois.edu"]}
)
assert response.status_code == 200
rjson = response.json()
assert len(rjson) == 1
s3resp = requests.get(rjson[0])
assert s3resp.status_code == 200

def test_twenty_profiles(api_client, jwt_generator):
"""Happy path: test that we can download one profile."""
jwt = jwt_generator(role="recruiter", env="dev", email="noone@testing.megacorp.com")
response = api_client.post(
"/api/v1/recruiter/mass_download", headers={"Authorization": f"Bearer {jwt}"},
json={"usernames": ["dsingh14@illinois.edu"] * 20}
)
assert response.status_code == 200
rjson = response.json()
assert len(rjson) == 20
s3resp = requests.get(rjson[0])
assert s3resp.status_code == 200
s3resp = requests.get(rjson[10])
assert s3resp.status_code == 200
s3resp = requests.get(rjson[19])
assert s3resp.status_code == 200

0 comments on commit 7b4d9e8

Please sign in to comment.