diff --git a/deployment/uwsgi/uwsgi.ini b/deployment/uwsgi/uwsgi.ini index a7658124..aa8f64dd 100644 --- a/deployment/uwsgi/uwsgi.ini +++ b/deployment/uwsgi/uwsgi.ini @@ -6,6 +6,7 @@ chmod-socket = 666 master = true processes = 2 harakiri-verbose = true +disable-logging = true harakiri = 45 http-timeout = 45 socket-timeout = 45 diff --git a/dev-requirements.txt b/dev-requirements.txt index 3009e3b3..3d6294ab 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -9,10 +9,7 @@ codacy-coverage moto==0.4.5 Sphinx==1.3.1 sphinxcontrib-httpdomain==1.3.0 --e git+https://git@github.com/uc-cdis/indexclient.git@1.0#egg=indexclient --e git+https://git@github.com/NCI-GDC/signpost.git@c8e2aa5ff572c808cba9b522b64f7b497e79c524#egg=signpost -e git+https://git@github.com/uc-cdis/cdisutils-test.git@0.0.1#egg=cdisutilstest --e git+https://git@github.com/uc-cdis/flask-postgres-session.git@0.1.1#egg=flask_postgres_session # dependency of sheepdog envelopes==0.4 -e git+https://git@github.com/uc-cdis/sheepdog.git@1.1.1#egg=sheepdog diff --git a/dockerrun.bash b/dockerrun.bash new file mode 100755 index 00000000..7c8a8fa1 --- /dev/null +++ b/dockerrun.bash @@ -0,0 +1,20 @@ +#!/bin/bash + +cd /var/www/peregrine + +export PYTHONUNBUFFERED=TRUE + +( + # Wait for nginx to create uwsgi.sock + let count=0 + while [[ (! -e uwsgi.sock) && count -lt 10 ]]; do + sleep 2 + let count="$count+1" + done + if [[ ! -e uwsgi.sock ]]; then + echo "WARNING: /var/www/peregrine/uwsgi.sock does not exist!!!" + fi + uwsgi --ini /etc/uwsgi/uwsgi.ini +) & + +nginx -g 'daemon off;' diff --git a/peregrine/api.py b/peregrine/api.py index 36313f47..a4d39cb4 100644 --- a/peregrine/api.py +++ b/peregrine/api.py @@ -12,6 +12,7 @@ import datamodelutils from dictionaryutils import DataDictionary, dictionary as dict_init from cdispyutils.log import get_handler +from indexclient.client import IndexClient from cdispyutils.uwsgi import setup_user_harakiri import peregrine @@ -66,6 +67,13 @@ def db_init(app): ) + app.logger.info('Initializing Indexd driver') + app.index_client = IndexClient( + app.config['SIGNPOST']['host'], + version=app.config['SIGNPOST']['version'], + auth=app.config['SIGNPOST']['auth']) + + # Set CORS options on app configuration def cors_init(app): accepted_headers = [ diff --git a/peregrine/dev_settings.example.py b/peregrine/dev_settings.example.py index 7bb4ae52..3ec43f54 100644 --- a/peregrine/dev_settings.example.py +++ b/peregrine/dev_settings.example.py @@ -4,7 +4,14 @@ # Auth AUTH = 'https://gdc-portal.nci.nih.gov/auth/keystone/v3/' -INTERNAL_AUTH = env.get('INTERNAL_AUTH', 'https://gdc-portal.nci.nih.gov/auth/') +INTERNAL_AUTH = env.get( + 'INTERNAL_AUTH', 'https://gdc-portal.nci.nih.gov/auth/') + +# Signpost +SIGNPOST = { + 'host': env.get('SIGNPOST_HOST', 'http://localhost:8888'), + 'version': 'v0', + 'auth': None} AUTH_ADMIN_CREDS = { 'domain_name': env.get('KEYSTONE_DOMAIN'), @@ -13,31 +20,18 @@ 'auth_url': env.get('KEYSTONE_AUTH_URL'), 'user_domain_name': env.get('KEYSTONE_DOMAIN')} -# Storage -CLEVERSAFE_HOST = env.get('CLEVERSAFE_HOST', 'cleversafe.service.consul') +STORAGE = { + "s3": + { + "access_key": '', + 'secret_key': '' + } +} -STORAGE = {"s3": { - "keys": { - "cleversafe.service.consul": { - "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'), - 'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')}, - "localhost": { - "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'), - 'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')}, - }, "kwargs": { - 'cleversafe.service.consul': { - 'host': 'cleversafe.service.consul', - "is_secure": False, - "calling_format": OrdinaryCallingFormat()}, - 'localhost': { - 'host': 'localhost', - "is_secure": False, - "calling_format": OrdinaryCallingFormat()}, - }}} SUBMISSION = { - "bucket": 'test_submission', - "host": CLEVERSAFE_HOST, + "bucket": '' } + # Postgres PSQLGRAPH = { 'host': os.getenv("GDC_PG_HOST", "localhost"), diff --git a/peregrine/dev_settings.py b/peregrine/dev_settings.py index 68f5c95f..05b45ab1 100644 --- a/peregrine/dev_settings.py +++ b/peregrine/dev_settings.py @@ -2,15 +2,16 @@ from boto.s3.connection import OrdinaryCallingFormat from os import environ as env -# Signpost -SIGNPOST = { - 'host': env.get('SIGNPOST_HOST', 'http://localhost:8888'), - 'version': 'v0', - 'auth': None} - # Auth AUTH = 'https://gdc-portal.nci.nih.gov/auth/keystone/v3/' -INTERNAL_AUTH = env.get('INTERNAL_AUTH', 'https://gdc-portal.nci.nih.gov/auth/') +INTERNAL_AUTH = env.get( + 'INTERNAL_AUTH', 'https://gdc-portal.nci.nih.gov/auth/') + +# Signpost +SIGNPOST = { + 'host': env.get('SIGNPOST_HOST', 'http://localhost:8888'), + 'version': 'v0', + 'auth': None} AUTH_ADMIN_CREDS = { 'domain_name': env.get('KEYSTONE_DOMAIN'), @@ -22,53 +23,63 @@ # Storage CLEVERSAFE_HOST = env.get('CLEVERSAFE_HOST', 'cleversafe.service.consul') -STORAGE = {"s3": { - "keys": { - "cleversafe.service.consul": { - "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'), - 'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')}, - "localhost": { - "access_key": os.environ.get('CLEVERSAFE_ACCESS_KEY'), - 'secret_key': os.environ.get('CLEVERSAFE_SECRET_KEY')}, - }, "kwargs": { - 'cleversafe.service.consul': { - 'host': 'cleversafe.service.consul', - "is_secure": False, - "calling_format": OrdinaryCallingFormat()}, - 'localhost': { - 'host': 'localhost', - "is_secure": False, - "calling_format": OrdinaryCallingFormat()}, - }}} +STORAGE = { + "s3": + { + "access_key": '', + 'secret_key': '' + } +} + + SUBMISSION = { - "bucket": 'test_submission', - "host": CLEVERSAFE_HOST, + "bucket": 'test_submission' } # Postgres PSQLGRAPH = { 'host': os.getenv("GDC_PG_HOST", "localhost"), 'user': os.getenv("GDC_PG_USER", "test"), 'password': os.getenv("GDC_PG_PASSWORD", "test"), - 'database': os.getenv("GDC_PG_DBNAME", "automated_test") + 'database': os.getenv("GDC_PG_DBNAME", "sheepdog_automated_test") } # API server PEREGRINE_HOST = os.getenv("PEREGRINE_HOST", "localhost") -PEREGRINE_PORT = int(os.getenv("PEREGRINE_PORT", "5000")) +PEREGRINE_PORT = int(os.getenv("PEREGRINE_PORT", "5555")) # FLASK_SECRET_KEY should be set to a secure random string with an appropriate # length; 50 is reasonable. For the random generation to be secure, use # ``random.SystemRandom()`` FLASK_SECRET_KEY = 'eCKJOOw3uQBR5pVDz3WIvYk3RsjORYoPRdzSUNJIeUEkm1Uvtq' -DICTIONARY_URL = os.environ.get('DICTIONARY_URL','https://s3.amazonaws.com/dictionary-artifacts/datadictionary/develop/schema.json') +DICTIONARY_URL = os.environ.get( + 'DICTIONARY_URL', 'https://s3.amazonaws.com/dictionary-artifacts/datadictionary/develop/schema.json') + +OIDC_ISSUER = 'http://localhost/user' HMAC_ENCRYPTION_KEY = os.environ.get('CDIS_HMAC_ENCRYPTION_KEY', '') +# OAUTH2 = { +# "client_id": os.environ.get('CDIS_PEREGRINE_CLIENT_ID'), +# "client_secret": os.environ.get("CDIS_PEREGRINE_CLIENT_SECRET"), +# "oauth_provider": os.environ.get("CDIS_USER_API_OAUTH", 'http://localhost:8000/oauth2/'), +# "redirect_uri": os.environ.get("CDIS_PEREGRINE_OAUTH_REDIRECT", 'localhost:5000/v0/oauth2/authorize'), +#} + OAUTH2 = { - "client_id": os.environ.get('CDIS_PEREGRINE_CLIENT_ID'), - "client_secret": os.environ.get("CDIS_PEREGRINE_CLIENT_SECRET"), - "oauth_provider": os.environ.get("CDIS_USER_API_OAUTH", 'http://localhost:8000/oauth2/'), - "redirect_uri": os.environ.get("CDIS_PEREGRINE_OAUTH_REDIRECT", 'localhost:5000/v0/oauth2/authorize'), + 'client_id': '', + 'client_secret': '', + 'api_base_url': 'http://localhost/user/', + 'authorize_url': 'http://localhost/user/oauth2/authorize', + 'access_token_url': 'http://localhost0/user/oauth2/token', + 'refresh_token_url': 'http://localhost/user/oauth2/token', + 'client_kwargs': { + 'redirect_uri': 'http://localhost/api/v0/oauth2/authorize', + 'scope': 'openid data user', + }, + # deprecated key values, should be removed after all commons use new oidc + 'internal_oauth_provider': 'http://localhost/oauth2/', + 'oauth_provider': 'http://localhost/user/oauth2/', + 'redirect_uri': 'http://localhost/api/v0/oauth2/authorize' } USER_API = "http://localhost:8000/" diff --git a/peregrine/resources/submission/__init__.py b/peregrine/resources/submission/__init__.py index b120bdc5..e5ec8c01 100644 --- a/peregrine/resources/submission/__init__.py +++ b/peregrine/resources/submission/__init__.py @@ -3,7 +3,14 @@ :py:mod:``peregrine``. """ +import datetime import os +import os.path + +import uuid +import shutil +from flask import Response, send_file, stream_with_context + import json import time import fcntl @@ -14,9 +21,11 @@ from peregrine.auth import current_user, get_program_project_roles import peregrine.blueprints +from peregrine.utils import jsonify_check_errors from peregrine.resources.submission import graphql + def get_open_project_ids(): """ List project ids corresponding to projects with ``availability_type == @@ -118,10 +127,12 @@ def set_read_access_projects(): @peregrine.blueprints.blueprint.route('/graphql', methods=['POST']) def root_graphql_query(): """ - Run a graphql query. + Run a graphql query and export to supported formats(json, bdbag) + """ # Short circuit if user is not recognized. Make sure that the list of # projects that the user has read access to is set. + try: set_read_access_projects() except AuthZError: @@ -129,12 +140,37 @@ def root_graphql_query(): return data, 403 payload = peregrine.utils.parse_request_json() query = payload.get('query') + export_format = payload.get('format') variables, errors = peregrine.utils.get_variables(payload) if errors: return flask.jsonify({'data': None, 'errors': errors}), 400 - return peregrine.utils.jsonify_check_errors( - graphql.execute_query(query, variables) - ) + + return_data = jsonify_check_errors(graphql.execute_query(query, variables)) + data, code = return_data + + if code != 200: + return data, code + + if export_format == 'bdbag': + res = peregrine.utils.flatten_json(json.loads(data.data), '', "-") + + bag_info = {'organization': 'CDIS', + 'data_type': 'TOPMed', + 'date_created': datetime.date.today().isoformat()} + args = dict( + bag_info=bag_info, + payload=res) + + bag = peregrine.utils.create_bdbag(**args) # bag is a compressed file + key_name = str(flask.g.user.id) + "/" + \ + str(uuid.uuid4()) + '_' + datetime.datetime.now().strftime('%s') + peregrine.utils.put_data_to_s3(bag, key_name) + url = peregrine.utils.generate_presigned_url(key_name) + shutil.rmtree(os.path.abspath(os.path.join(bag, os.pardir))) + + return flask.Response(url), 200 + else: + return return_data def generate_schema_file(graphql_schema, app_logger): diff --git a/peregrine/utils/__init__.py b/peregrine/utils/__init__.py index ced77ead..3bc6f18e 100644 --- a/peregrine/utils/__init__.py +++ b/peregrine/utils/__init__.py @@ -1,2 +1,6 @@ -from .payload import get_variables,jsonify_check_errors,parse_request_json +from .payload import get_variables,jsonify_check_errors,parse_request_json,get_keys,contain_node_with_category +from .pybdbag import create_bdbag from .scheduling import AsyncPool +from .json2csv import flatten_obj, json2tsv, dicts2tsv, flatten_json +from .response import format_response +from .s3 import put_data_to_s3, generate_presigned_url diff --git a/peregrine/utils/json2csv.py b/peregrine/utils/json2csv.py index 0d8ae499..78d70c41 100644 --- a/peregrine/utils/json2csv.py +++ b/peregrine/utils/json2csv.py @@ -47,3 +47,105 @@ def to_csv(hits, dialect='excel'): writer.writerows(rows) return s.getvalue() + + +def dicts2tsv(dict_list): + """ + Convert the list of dictionary to tsv format. + Each element of the list represent a row in tsv + + Args: + dict_list(list): list of dictionary + + Returns: + output(str): string in tsv format + """ + + tsv = "" + + header_set = set() + + for dict_row in dict_list: + for key in dict_row.keys(): + if (dict_row[key] is not None and dict_row[key] != []): + header_set.update([key]) + + for h in header_set: + words = h.split('-') + tsv = tsv + "{}\t".format(words[-1]) + tsv = tsv[:-1] + "\n" + + nrow = 0 + for dict_row in dict_list: + for h in header_set: + if dict_row.get(h): + tsv = tsv + "{}\t".format(dict_row[h]) + else: + tsv = tsv + "None\t" + tsv = tsv[:-1] + "\n" + nrow = nrow + 1 + if nrow >= 1000: + break + return tsv + + +def join(tsv_list, L, index, row): + ''' + Join list of sub tsv to generate a big tsv + + Args: + tsv_list(list): list of tables or tvs. Each element is represented by a list of dictionary + L(list): joined table that is iteratively updated + index(int): the index of the table will be joined + row(dict): the current joining row + + Returns: None + ''' + if index == len(tsv_list): + L.append(row) + else: + for item in tsv_list[index]: + newrow = row.copy() + newrow.update(item) + join(tsv_list, L, index + 1, newrow) + + +def json2tsv(json, prefix, delem): + ''' + Convert json file to tsv format + + Args: + json(json) graphQL output JSON + prefix(str) prefix string + delem(char): delimitter .e.g '\t' + + Returns: + list of dictionary representing a tsv file. Each item in the list represent a row data. + each row is a dictionary with column name key and value at that position + ''' + + L = [] + if isinstance(json, list) and json != []: + for l in json: + L += (json2tsv(l, prefix, delem)) + return L + if isinstance(json, dict): + # handle dictionary + tsv_list = [] + for k in json.keys(): + tsv = json2tsv(json[k], prefix + delem + k, delem) + tsv_list.append(tsv) + + join(tsv_list, L, 0, {}) + else: + L.append({prefix: json}) + return L + + +def flatten_json(json, prefix, delem): + data = json['data'] + res = {} + for key, val in data.iteritems(): + res[key] = json2tsv({key: val}, prefix, delem) + + return res diff --git a/peregrine/utils/payload.py b/peregrine/utils/payload.py index 39cfad41..44858a90 100644 --- a/peregrine/utils/payload.py +++ b/peregrine/utils/payload.py @@ -28,6 +28,8 @@ SUCCESS_STATE, ) +from peregrine.resources.submission.graphql.node import get_fields + def get_external_proxies(): """Get any custom proxies set in the config. @@ -215,6 +217,7 @@ def get_introspection_query(): f = open(os.path.join(cur_dir, 'graphql', 'introspection_query.txt'), 'r') return f.read() + def json_dumps_formatted(data): """Return json string with standard format.""" dump = json.dumps( @@ -222,6 +225,7 @@ def json_dumps_formatted(data): ) return dump.encode('utf-8') + def jsonify_check_errors(data_and_errors, error_code=400): """ TODO @@ -245,3 +249,40 @@ def get_variables(payload): except Exception as e: errors = ['Unable to parse variables', str(e)] return variables, errors + + +def contain_node_with_category(json, category): + ''' + Check if JSON object contain `category` keys or not + Args: + json: JSON object + Returns: + True: if JSON object contains data_file key + False: otherwise + ''' + keys_list = [] + get_keys(json, keys_list) + ns_field = get_fields() + + category_map = {} + for (k, v) in ns_field.iteritems(): + category_map[v] = k._dictionary['category'] + + for key in keys_list: + try: + if category_map[key] == category: + return True + except KeyError: + pass + return False + + +def get_keys(payload, keys_list): + ''' + Get all keys of JSON object and update to the keys_list + ''' + if isinstance(payload, dict): + keys_list += payload.keys() + map(lambda x: get_keys(x, keys_list), payload.values()) + elif isinstance(payload, list): + map(lambda x: get_keys(x, keys_list), payload) diff --git a/peregrine/utils/pybdbag.py b/peregrine/utils/pybdbag.py new file mode 100644 index 00000000..57ede2ec --- /dev/null +++ b/peregrine/utils/pybdbag.py @@ -0,0 +1,143 @@ +import os +import re +import bagit +import csv +import zipfile +import tempfile +import shutil +import uuid + +from flask import current_app + + +from peregrine.resources.submission.graphql.node import get_fields + + +def get_node_set(nodetype): + ns_field = get_fields() + data_files = set() + for (k, v) in ns_field.iteritems(): + if k._dictionary['category'] == nodetype: + data_files.update([str(v)]) + return data_files + + +def is_category(node_name, data_files): + for item in data_files: + if node_name.find(item) >= 0: + return True + return False + + +def is_uuid(uuid): + if uuid.startswith('dg.'): + return True + pattern = re.compile( + "^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$") + if pattern.match(uuid): + return True + return False + +def trim_uri(uri): + return uri.replace("s3://", "").replace("gs://", "").replace("http://", "").replace("https://", "") + +def create_bdbag(bag_info, payload, max_row=1000): + """Modify from https://github.com/BD2KGenomics/dcc-dashboard-service/blob/feature/manifest-handover/webservice.py + Create compressed BDbag file. + Args: + bag_info: bdbag info + payload(json): resutl of graphql given a query + max_row(int): the row limitation of tsv files + Returns: + the path of bdbag zip file + """ + + if len(payload) == 0: + return + data_files = get_node_set('data_file') + tmp_dir = tempfile.mkdtemp() + bag_path = tmp_dir + '/manifest_bag' + os.makedirs(bag_path) + bag = bagit.make_bag(bag_path, bag_info) + + data_file_uuids = set() + + for node_name, json_data in payload.iteritems(): + header_set = set() + data_file_headers = set() + for dict_row in json_data: + for key in dict_row.keys(): + if (dict_row[key] is not None and dict_row[key] != []): + header_set.update([key]) + words = key.split('-') + if len(words) > 1 and is_category(words[-2], data_files): + data_file_headers.update([key]) + + for dict_row in json_data: + for h in data_file_headers: + if dict_row.get(h) and is_uuid(dict_row[h]): + data_file_uuids.update([dict_row[h]]) + + header_set = sorted(header_set) + + with open(bag_path + '/data/' + node_name + '.tsv', 'w') as tsvfile: + writer = csv.writer(tsvfile, delimiter='\t') + row = [] + for h in header_set: + words = h.split('-') + header = words[-1] + row = row + [header] + if header[0] == '_': + unique_id_header = 'entity:' + header[1:] + row.insert(0, unique_id_header) + writer.writerow(row) + + nrow = 0 + for dict_row in json_data: + row = [str(uuid.uuid4())] # unique id + add_row = True + for h in header_set: + if dict_row.get(h): + value = dict_row[h] + if 'file_dos' in h: + value = 'dos://' + value + row = row + [value] + elif 'file' in h: + # ignoring missing file rows + add_row = False + break + else: + row = row + ["None"] + if add_row: + nrow = nrow + 1 + writer.writerow(row) + if nrow >= max_row: + break + + with open(bag_path + '/fetch.txt', 'w') as fetch_file: + for item in data_file_uuids: + document = current_app.index_client.get(item) + if document: + fetch_file.write( + 'dos://' + item + '\t' + str(document.size) + '\tdata/' + str(trim_uri(document.urls[0])) + '\n') + else: + fetch_file.write(item + '\n') + + bag.save(manifests=True) # creates checksum manifests + # Compress bag. + zip_dir = bag_path + zip_file_name = tmp_dir + '/manifest_bag.zip' + zipf = zipfile.ZipFile(zip_file_name, 'w', zipfile.ZIP_DEFLATED) + zipdir(zip_dir, zipf) + zipf.close() + shutil.rmtree(zip_dir) + return zip_file_name + + +def zipdir(path, ziph): + length = len(path) + # ziph is zipfile handle + for root, _, files in os.walk(path): + folder = root[length:] # path without "parent" + for file in files: + ziph.write(os.path.join(root, file), os.path.join(folder, file)) diff --git a/peregrine/utils/response.py b/peregrine/utils/response.py index ad468274..169a344e 100644 --- a/peregrine/utils/response.py +++ b/peregrine/utils/response.py @@ -8,10 +8,9 @@ import os from cdispyutils.log import get_handler from flask import Response, Markup -from peregrine import VERSION from peregrine.utils.json2csv import to_csv -defusedxml.defuse_stdlib() +#defusedxml.defuse_stdlib() logger = logging.getLogger("peregrine.utils.response") logger.addHandler(get_handler()) @@ -30,13 +29,6 @@ def get_data_release(): return 'Data Release 3.0 - September 21, 2016' -def get_status(): - status = {'status': 'OK', 'version': 1, 'tag': VERSION, 'data_release': get_data_release()} - if COMMIT: - status["commit"] = COMMIT - return status - - def tryToInt(value): new_value = value try: @@ -125,6 +117,7 @@ def format_response(request_options, data, mimetype): Returns: A Flask Response object, with the data formatted as specified and the Content-Type set """ + import pdb; pdb.set_trace() if (request_options.get('attachment', '').lower() == 'true' or "text/csv" in mimetype or "text/tab-separated-values" in mimetype): @@ -152,8 +145,8 @@ def format_response(request_options, data, mimetype): data = to_json(request_options, data) response = Response(data, mimetype=mimetype) - for key, value in get_status().iteritems(): - response.headers.extend({'X-GDC-{}'.format(key): value}) + # for key, value in get_status().iteritems(): + # response.headers.extend({'X-GDC-{}'.format(key): value}) return response diff --git a/peregrine/utils/s3.py b/peregrine/utils/s3.py new file mode 100644 index 00000000..87b606f4 --- /dev/null +++ b/peregrine/utils/s3.py @@ -0,0 +1,42 @@ +import boto3 +import flask + +UPLOAD_SUCCESS = True +UPLOAD_FAIL = False + +def put_data_to_s3(filename, key_name): + bucket_name = flask.current_app.config['SUBMISSION']['bucket'] + + data = open(filename, 'rb') + config = flask.current_app.config["STORAGE"]["s3"] + + try: + s3 = boto3.resource( + 's3', + aws_access_key_id=config["access_key"], + aws_secret_access_key=config["secret_key"]) + s3.Bucket(bucket_name).put_object(Key=key_name, Body=data) + return UPLOAD_SUCCESS + except Exception: + return UPLOAD_FAIL + + +def generate_presigned_url(keyname): + config = flask.current_app.config["STORAGE"]["s3"] + bucket_name = flask.current_app.config['SUBMISSION']['bucket'] + + client = boto3.client( + 's3', + aws_access_key_id=config["access_key"], + aws_secret_access_key=config["secret_key"]) + + url = client.generate_presigned_url( + ClientMethod='get_object', + Params={ + 'Bucket': bucket_name, + 'Key': keyname, + 'ResponseContentDisposition': 'attachment; filename=manifest_bag.zip', + 'ResponseContentType': 'application/zip' + } + ) + return url diff --git a/requirements.txt b/requirements.txt index 7ef95060..90986263 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +bdbag==1.2.3 +boto3==1.7.8 defusedxml==0.5.0 scipy==0.18.1 SurvivalPy==1.0.2 @@ -42,4 +44,6 @@ cdispyutils==0.2.12 -e git+https://git@github.com/uc-cdis/graphql-core.git@cdis2.0.0#egg=graphql-core -e git+https://git@github.com/uc-cdis/cdiserrors.git@0.1.1#egg=cdiserrors -e git+https://git@github.com/uc-cdis/cdislogging.git@master#egg=cdislogging +-e git+https://git@github.com/uc-cdis/indexclient.git@1.0#egg=indexclient +-e git+https://git@github.com/NCI-GDC/signpost.git@c8e2aa5ff572c808cba9b522b64f7b497e79c524#egg=signpost -e git+https://git@github.com/uc-cdis/authutils.git@3.0.1#egg=authutils diff --git a/tests/graphql/test_graphql.py b/tests/graphql/test_graphql.py index dfae9ff7..1f68ebef 100644 --- a/tests/graphql/test_graphql.py +++ b/tests/graphql/test_graphql.py @@ -1,5 +1,6 @@ import json import os +import shutil import random import pytest @@ -8,19 +9,32 @@ from psqlgraph import Node from peregrine import dictionary +import peregrine from tests.graphql import utils from tests.graphql.utils import data_fnames +from peregrine.utils import json2tsv + +# Python 2 and 3 compatible +try: + from unittest.mock import MagicMock + from unittest.mock import patch +except ImportError: + from mock import MagicMock + from mock import patch + BLGSP_PATH = '/v0/submission/CGCI/BLGSP/' BRCA_PATH = '/v0/submission/TCGA/BRCA/' DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') path = '/v0/submission/graphql' +#export_path = '/v0/submission/export' # ====================================================================== # Fixtures + @pytest.fixture def graphql_client(client, submitter): def execute(query, variables={}): @@ -82,6 +96,7 @@ def failed_upload_transaction(client, submitter, pg_driver_clean): # ====================================================================== # Tests + def post_example_entities_together( client, pg_driver_clean, submitter, data_fnames=data_fnames): path = BLGSP_PATH @@ -100,6 +115,7 @@ def put_example_entities_together(client, pg_driver_clean, submitter): data.append(json.loads(f.read())) return client.put(path, headers=submitter, data=json.dumps(data)) + def put_cgci(client, auth=None): path = '/v0/submission' data = json.dumps({ @@ -109,6 +125,7 @@ def put_cgci(client, auth=None): r = client.put(path, headers=auth, data=data) return r + def put_cgci_blgsp(client, auth=None): put_cgci(client, auth=auth) path = '/v0/submission/CGCI/' @@ -173,6 +190,7 @@ def test_unathenticated_graphql_query( assert r.status_code == 401, r.data + def test_fragment(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) r = client.post(path, headers=submitter, data=json.dumps({ @@ -693,7 +711,7 @@ def test_transaction_logs(client, submitter, pg_driver_clean, cgci_blgsp): assert r.json == { "data": { "transaction_log": [{ - 'project_id': 'CGCI-BLGSP', 'submitter': None + 'project_id': 'CGCI-BLGSP', 'submitter': None }] } } @@ -716,7 +734,7 @@ def test_with_path_to(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter) with pg_driver_clean.session_scope(): case_sub_id = pg_driver_clean.nodes(models.Case).path('samples')\ - .first().submitter_id + .first().submitter_id r = client.post(path, headers=submitter, data=json.dumps({ 'query': """ query Test {{ @@ -969,6 +987,7 @@ def test_catch_language_error(client, submitter, pg_driver_clean, cgci_blgsp): )] } + @pytest.mark.skip(reason='must rewrite query') def test_filter_empty_prop_list( client, submitter, pg_driver_clean, cgci_blgsp, monkeypatch): @@ -1110,8 +1129,6 @@ def test_read_group_with_path_to_case( } - - def test_tx_logs_async_fields(pg_driver_clean, graphql_client, cgci_blgsp): assert graphql_client("""{ tx_log: transaction_log { @@ -1183,6 +1200,7 @@ def test_tx_logs_committable(pg_driver_clean, graphql_client, cgci_blgsp, mock_t } } + @pytest.mark.skip(reason='we have different data') def test_tx_logs_deletion(pg_driver_clean, graphql_client, cgci_blgsp, failed_deletion_transaction): response = graphql_client("""{ @@ -1305,6 +1323,114 @@ def test_tx_log_comprehensive_query_failed_deletion( assert 'errors' not in response.json, response.data +def test_json2tsv(): + + data = {"project": [ + { + "code": "BLGSP", + "experiments": [], + "id": "daa208a7-f57a-562c-a04a-7a7c77542c98", + "name": "Burkitt Lymphoma Genome Sequencing Project", + "programs": [ + { + "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", + "name": "DEV" + } + ] + }] + } + + res = json2tsv(data, '', '_') + + assert len(res) == 1 + assert res[0]['_project_programs_id'] == 'f6bd2676-33f6-5671-ac2f-38aa1ceedcd8' + assert res[0]['_project_programs_name'] == 'DEV' + assert res[0]['_project_id'] == 'daa208a7-f57a-562c-a04a-7a7c77542c98' + assert res[0]['_project_code'] == 'BLGSP' + assert res[0]['_project_name'] == 'Burkitt Lymphoma Genome Sequencing Project' + + +def test_json2tsv_multiple_branches(client, submitter, pg_driver_clean): + data = {"data": { + "project": [ + { + "code": "BLGSP", + "experiments": [], + "id": "daa208a7-f57a-562c-a04a-7a7c77542c98", + "name": "Burkitt Lymphoma Genome Sequencing Project", + "programs": [ + { + "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", + "name": "DEV" + } + ] + }, + { + "code": "test", + "experiments": [ + { + "id": "8307c663-af58-4b01-8fd0-9b63f55dac10" + }, + { + "id": "f6e00607-7f38-49ea-b64b-c45ccf0ff990" + } + ], + "id": "a77f549b-c74b-563e-80bb-570b5a4dde88", + "name": "test", + "programs": [ + { + "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", + "name": "DEV" + } + ] + }, + { + "code": "open", + "experiments": [], + "id": "9a2fe4bf-5484-5fe4-b882-0d61ecade7cc", + "name": "Open access Project", + "programs": [ + { + "id": "f6bd2676-33f6-5671-ac2f-38aa1ceedcd8", + "name": "DEV" + } + ] + } + ] + } + } + res = json2tsv(data, '', '_') + + assert len(res) == 4 + assert res[0]['_data_project_programs_name'] == 'DEV' + assert res[0]['_data_project_id'] == 'daa208a7-f57a-562c-a04a-7a7c77542c98' + assert res[0]['_data_project_programs_id'] == 'f6bd2676-33f6-5671-ac2f-38aa1ceedcd8' + assert res[1]['_data_project_programs_id'] == 'f6bd2676-33f6-5671-ac2f-38aa1ceedcd8' + assert res[1]['_data_project_name'] == 'test' + +@patch('peregrine.utils.s3.put_data_to_s3') +@patch('peregrine.utils.s3.generate_presigned_url') +def test_bagit_endpoint( + generate_presigned_url, put_data_to_s3, + client, submitter, monkeypatch): + data = json.dumps({ + 'format': 'bdbag', + 'path': 'manifest_bag', + 'query': """ + { + valid: project (project_id: "CGCI-BLGSP") { ...f } + invalid: project (project_id: "TCGA-TEST") { ...f } + multiple: project (project_id: ["TCGA-BRCA", "CGCI-BLGSP"]) { ...f } + } + fragment f on project { project_id code } + """ + }) + put_data_to_s3.return_value = True + generate_presigned_url.return_value = 'http://presignedurl.test' + res = client.post(path, headers=submitter, data=data) + assert res.status_code == 200 + assert res.data + def test_nodetype_interface(client, submitter, pg_driver_clean, cgci_blgsp): post_example_entities_together(client, pg_driver_clean, submitter)