From ab7e2a9cad43b91702de3c70c6123c403c29d0eb Mon Sep 17 00:00:00 2001 From: Petros Kalos Date: Thu, 26 Sep 2024 14:20:23 +0300 Subject: [PATCH] migrate local server to FastAPI (#1577) ### Feature or Bugfix - Refactoring ### Detail * Migrate local server Flask to FastAPI * Upgrade and align versions of important libraries (aws-cdk-lib/boto3) * Simplify requirements files (removed explicitly defined deps that were not required or were implicitly pulled by other packages). ### Testing Penting succesful run on dev pipeline ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/base/cdkproxy/requirements.txt | 13 +-- backend/dataall/base/context.py | 2 +- .../data_pipeline_blueprint/requirements.txt | 3 +- backend/local_graphql_server.py | 87 ++++++++----------- backend/requirements.txt | 8 +- deploy/requirements.txt | 8 +- deploy/stacks/cdk_nag_exclusions.py | 4 + deploy/stacks/pipeline.py | 6 +- docker-compose.yaml | 2 +- tests/client.py | 35 ++++---- tests/conftest.py | 4 +- tests/requirements.txt | 1 + tests_new/integration_tests/requirements.txt | 2 +- 13 files changed, 81 insertions(+), 94 deletions(-) diff --git a/backend/dataall/base/cdkproxy/requirements.txt b/backend/dataall/base/cdkproxy/requirements.txt index 2c9be32f7..894243316 100644 --- a/backend/dataall/base/cdkproxy/requirements.txt +++ b/backend/dataall/base/cdkproxy/requirements.txt @@ -1,17 +1,12 @@ -aws-cdk-lib==2.99.0 -boto3==1.34.119 -boto3-stubs==1.34.119 -botocore==1.34.119 +aws-cdk-lib==2.160.0 +boto3==1.35.26 +boto3-stubs==1.35.26 cdk-nag==2.7.2 -constructs==10.0.73 -starlette==0.36.3 -fastapi == 0.109.2 -Flask==2.3.2 +fastapi == 0.115.0 PyYAML==6.0 requests==2.32.2 tabulate==0.8.9 uvicorn==0.15.0 werkzeug==3.0.3 -constructs>=10.0.0,<11.0.0 git-remote-codecommit==1.16 aws-ddk-core==1.3.0 \ No newline at end of file diff --git a/backend/dataall/base/context.py b/backend/dataall/base/context.py index b271b3e18..3df892e18 100644 --- a/backend/dataall/base/context.py +++ b/backend/dataall/base/context.py @@ -4,7 +4,7 @@ that in the request scope The class uses Flask's approach to handle request: ThreadLocal -That approach should work fine for AWS Lambdas and local server that uses Flask app +That approach should work fine for AWS Lambdas and local server that uses FastApi app """ from dataclasses import dataclass diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt index 4067e0fd9..7351d1274 100644 --- a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt @@ -1,3 +1,2 @@ -aws-cdk-lib==2.103.1 -constructs>=10.0.0,<11.0.0 +aws-cdk-lib==2.160.0 aws-ddk-core==1.3.0 diff --git a/backend/local_graphql_server.py b/backend/local_graphql_server.py index 1ea96a732..40c765896 100644 --- a/backend/local_graphql_server.py +++ b/backend/local_graphql_server.py @@ -1,24 +1,23 @@ +import logging import os import jwt from ariadne import graphql_sync from ariadne.constants import PLAYGROUND_HTML -from flask import Flask, request, jsonify -from flask_cors import CORS +from fastapi import FastAPI, Request from graphql import parse +from starlette.middleware.cors import CORSMiddleware +from starlette.responses import JSONResponse, HTMLResponse from dataall.base.api import get_executable_schema -from dataall.core.tasks.service_handlers import Worker -from dataall.core.permissions.services.tenant_permissions import TENANT_ALL -from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService - -from dataall.base.db import get_engine, Base -from dataall.base.searchproxy import connect, run_query -from dataall.base.loader import load_modules, ImportMode from dataall.base.config import config from dataall.base.context import set_context, dispose_context, RequestContext - -import logging +from dataall.base.db import get_engine, Base +from dataall.base.loader import load_modules, ImportMode +from dataall.base.searchproxy import connect, run_query +from dataall.core.permissions.services.tenant_permissions import TENANT_ALL +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService +from dataall.core.tasks.service_handlers import Worker logger = logging.getLogger('graphql') logger.propagate = False @@ -45,10 +44,14 @@ def __init__(self, **kwargs): schema = get_executable_schema() -# app = GraphQL(schema, debug=True) - -app = Flask(__name__) -CORS(app) +app = FastAPI(debug=True) +app.add_middleware( + CORSMiddleware, + allow_origins=['*'], + allow_credentials=True, + allow_methods=['*'], + allow_headers=['*'], +) def request_context(headers, mock=False): @@ -87,67 +90,61 @@ def request_context(headers, mock=False): return context.__dict__ -@app.route('/graphql', methods=['OPTIONS']) +@app.options('/graphql') def opt(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return '

Hello

', 200 + return HTMLResponse('

Hello

') -@app.route('/esproxy', methods=['OPTIONS']) +@app.options('/esproxy') def esproxyopt(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return '

Hello

', 200 + return HTMLResponse('

Hello

') -@app.route('/graphql', methods=['GET']) +@app.get('/graphql') def graphql_playground(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return PLAYGROUND_HTML, 200 + return HTMLResponse(PLAYGROUND_HTML) -@app.route('/esproxy', methods=['POST']) -def esproxy(): - body = request.data.decode('utf-8') - print(body) +@app.post('/esproxy') +async def esproxy(request: Request): + body = (await request.body()).decode('utf-8') + logger.info('body %s', body) return run_query(es=es, index='dataall-index', body=body) -@app.route('/graphql', methods=['POST']) -def graphql_server(): - print('.............................') - # GraphQL queries are always sent as POST - logger.debug(request.data) - data = request.get_json() - print('*** Request ***', request.data) - logger.info(data) +@app.post('/graphql') +async def graphql_server(request: Request): + logger.info('.............................') + data = await request.json() + logger.info('Request payload %s', data) # Extract the GraphQL query string from the 'query' key in the data dictionary query_string = data.get('query') if not query_string: - return jsonify({'error': 'GraphQL query not provided'}), 400 + return JSONResponse({'error': 'GraphQL query not provided'}, 400) try: query = parse(query_string) except Exception as e: - return jsonify({'error': str(e)}), 400 + return JSONResponse({'error': str(e)}, 400) - print('***** Printing Query ****** \n\n') - print(query) + logger.info('Request query %s', query.to_dict()) context = request_context(request.headers, mock=True) logger.debug(context) - # Note: Passing the request to the context is optional. - # In Flask, the current request is always accessible as flask.request success, result = graphql_sync( schema, data, @@ -157,14 +154,4 @@ def graphql_server(): dispose_context() status_code = 200 if success else 400 - return jsonify(result), status_code - - -if __name__ == '__main__': - logger.info('Starting dataall flask local application') - app.run( - debug=True, # nosec - threaded=False, - host='0.0.0.0', - port=5000, - ) + return JSONResponse(result, status_code) diff --git a/backend/requirements.txt b/backend/requirements.txt index 94f7927ca..d50a72380 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,10 +1,7 @@ ariadne==0.17.0 aws-xray-sdk==2.4.3 -boto3==1.34.119 -botocore==1.34.119 -fastapi == 0.109.2 -Flask==3.0.3 -flask-cors==5.0.0 +boto3==1.35.26 +fastapi == 0.115.0 nanoid==2.0.0 opensearch-py==1.0.0 PyAthena==2.3.0 @@ -14,5 +11,4 @@ PyYAML==6.0 requests==2.32.2 requests_aws4auth==1.1.1 sqlalchemy==1.3.24 -starlette==0.36.3 alembic==1.13.1 \ No newline at end of file diff --git a/deploy/requirements.txt b/deploy/requirements.txt index 3ac23e4da..d91bc5199 100644 --- a/deploy/requirements.txt +++ b/deploy/requirements.txt @@ -1,6 +1,4 @@ -aws-cdk-lib==2.115.0 -boto3-stubs==1.20.20 -boto3==1.28.23 -botocore==1.31.23 +aws-cdk-lib==2.160.0 +boto3==1.35.26 +boto3-stubs==1.35.26 cdk-nag==2.7.2 -constructs>=10.0.0,<11.0.0 diff --git a/deploy/stacks/cdk_nag_exclusions.py b/deploy/stacks/cdk_nag_exclusions.py index a9948088b..e1c80e0d2 100644 --- a/deploy/stacks/cdk_nag_exclusions.py +++ b/deploy/stacks/cdk_nag_exclusions.py @@ -28,6 +28,10 @@ 'id': 'AwsSolutions-CB3', 'reason': 'Access to docker daemon is required to build docker images', }, + { + 'id': 'AwsSolutions-SMG4', + 'reason': 'Database is used for test purposes', + }, ] BACKEND_STACK_CDK_NAG_EXCLUSIONS = [ diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index 39e7bad38..add56e890 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -821,7 +821,7 @@ def set_cloudfront_stage(self, target_env): f'echo "external_id = {get_tooling_account_external_id(target_env["account"])}" >> ~/.aws/config', 'aws sts get-caller-identity --profile buildprofile', 'export AWS_PROFILE=buildprofile', - 'pip install boto3==1.34.35', + 'pip install boto3==1.35.26', 'pip install beautifulsoup4', 'python deploy/configs/frontend_config.py', 'export AWS_DEFAULT_REGION=us-east-1', @@ -893,7 +893,7 @@ def cw_rum_config_action(self, target_env): 'aws sts get-caller-identity --profile buildprofile', 'export AWS_PROFILE=buildprofile', 'pip install --upgrade pip', - 'pip install boto3==1.34.35', + 'pip install boto3==1.35.26', 'python deploy/configs/rum_config.py', ], role=self.expanded_codebuild_role.without_policy_updates(), @@ -962,7 +962,7 @@ def set_albfront_stage(self, target_env, repository_name): f'echo "external_id = {get_tooling_account_external_id(target_env["account"])}" >> ~/.aws/config', 'aws sts get-caller-identity --profile buildprofile', 'export AWS_PROFILE=buildprofile', - 'pip install boto3==1.34.35', + 'pip install boto3==1.35.26', 'pip install beautifulsoup4', 'python deploy/configs/frontend_config.py', 'unset AWS_PROFILE', diff --git a/docker-compose.yaml b/docker-compose.yaml index 6b10cfeac..2e9597772 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -37,7 +37,7 @@ services: dockerfile: docker/dev/Dockerfile args: CONTAINER_UID: ${UID} - entrypoint: /bin/bash -c "../build/wait-for-it.sh elasticsearch:9200 -t 30 && python3.9 local_graphql_server.py" + entrypoint: /bin/bash -c "../build/wait-for-it.sh elasticsearch:9200 -t 30 && uvicorn local_graphql_server:app --host 0.0.0.0 --port 5000 --reload" expose: - 5000 ports: diff --git a/tests/client.py b/tests/client.py index 5f5346f0a..908709acf 100644 --- a/tests/client.py +++ b/tests/client.py @@ -1,12 +1,16 @@ -import typing import json +import typing + from ariadne import graphql_sync from ariadne.constants import PLAYGROUND_HTML -from flask import Flask, request, jsonify, Response +from fastapi import FastAPI from munch import DefaultMunch +from starlette.requests import Request +from starlette.responses import JSONResponse, HTMLResponse + from dataall.base.api import get_executable_schema -from dataall.base.context import set_context, dispose_context, RequestContext from dataall.base.config import config +from dataall.base.context import set_context, dispose_context, RequestContext config.set_property('cdk_proxy_url', 'mock_url') @@ -22,40 +26,41 @@ def query( groups: typing.List[str] = ['-'], **variables, ): - response: Response = self.client.post( + if not isinstance(username, str): + username = username.username + response = self.client.post( '/graphql', json={'query': f""" {query} """, 'variables': variables}, headers={'groups': json.dumps(groups), 'username': username}, ) - return DefaultMunch.fromDict(response.get_json()) + return DefaultMunch.fromDict(json.loads(response.text)) def create_app(db): - app = Flask('tests') + app = FastAPI(debug=True) schema = get_executable_schema() - @app.route('/', methods=['OPTIONS']) + @app.options('/') def opt(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return '

Hello

', 200 + return HTMLResponse('

Hello

') - @app.route('/graphql', methods=['GET']) + @app.get('/graphql') def graphql_playground(): # On GET request serve GraphQL Playground # You don't need to provide Playground if you don't want to # but keep on mind this will not prohibit clients from # exploring your API using desktop GraphQL Playground app. - return PLAYGROUND_HTML, 200 + return HTMLResponse(PLAYGROUND_HTML) - @app.route('/graphql', methods=['POST']) - def graphql_server(): + @app.post('/graphql') + async def graphql_server(request: Request): # GraphQL queries are always sent as POST # Note: Passing the request to the context is optional. - # In Flask, the current request is always accessible as flask.request - data = request.get_json() + data = await request.json() username = request.headers.get('Username', 'anonym') user_id = request.headers.get('Username', 'anonym_id') @@ -72,6 +77,6 @@ def graphql_server(): dispose_context() status_code = 200 if success else 400 - return jsonify(result), status_code + return JSONResponse(result, status_code) return app diff --git a/tests/conftest.py b/tests/conftest.py index 44a12aa71..78c96a26e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,8 @@ from unittest.mock import MagicMock import pytest +from starlette.testclient import TestClient + from dataall.base.db import get_engine, create_schema_and_tables, Engine from dataall.base.loader import load_modules, ImportMode, list_loaded_modules from glob import glob @@ -72,7 +74,7 @@ def app(db): @pytest.fixture(scope='module') def client(app) -> ClientWrapper: - with app.test_client() as client: + with TestClient(app) as client: yield ClientWrapper(client) diff --git a/tests/requirements.txt b/tests/requirements.txt index 87c1432ef..85e3fb149 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -3,5 +3,6 @@ pytest==7.3.1 pytest-cov==3.0.0 pytest-mock==3.6.1 pytest-dependency==0.5.1 +httpx==0.27.2 werkzeug==3.0.3 assertpy==1.1.0 \ No newline at end of file diff --git a/tests_new/integration_tests/requirements.txt b/tests_new/integration_tests/requirements.txt index ced7a58ef..2225c80dc 100644 --- a/tests_new/integration_tests/requirements.txt +++ b/tests_new/integration_tests/requirements.txt @@ -1,5 +1,5 @@ assertpy==1.1.0 -boto3==1.26.95 +boto3==1.35.26 munch==2.5.0 pytest==7.3.1 pytest-cov==3.0.0