-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
236 lines (194 loc) · 9.92 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
##----- Lribraries
from configparser import ConfigParser
from flask import Flask, request, render_template, redirect
import os
from functions.calculations import Calculations, enforce_float
from functions.utils import get_json_response, nonnumeric_rows, missing_data_rows, duplicate_rows
import pandas as pd
from functools import reduce
from flask import Flask, flash, url_for
import io
from waitress import serve
##----- App
# -- Setup env from config
# Read config.ini
config = ConfigParser()
config.read("config.ini")
from functions.utils import get_path_info, get_db_info
# flask secret key
SECRET_KEY = config["USER"]["SECRETKEY"]
# Paths
ROOT_DIR, TEMP_DIR, TAXON_DIR, DB_DIR, DS_DIR, DOWNLOAD_DIR = get_path_info(config)
# SQL connection information
SERVER, USERNAME, PASSWORD, DRIVER, DB_NAME = get_db_info(config)
ALLOWED_EXTENSIONS = {"csv"}
##-- Flask app
# Create instance of flask object
app = Flask(__name__)
# Route for root page
@app.route("/")
@app.route("/home", methods=["GET", "POST"])
def home():
"""
Home page for app.
"""
# config
config = ConfigParser()
config.read("config.ini")
DATE = config["USER"]["date"]
return render_template("/home.html", access_date=DATE)
@app.route("/upload", methods=["GET", "POST"])
def upload():
"""
Upload page.
"""
return render_template("/upload.html")
@app.route("/upload-display", methods=["GET", "POST"])
def upload_display():
"""
Display priority dataframe
and scoring/ scaling filters.
"""
if request.method == "POST":
##------ If user sumbits biobank csv
if "submit" in request.form:
# if file not given
if not request.files["file"]:
flash("No file uploaded")
return render_template("/upload.html")
file = request.files["file"]
file_ext = file.filename.split(".", 1)[1]
# check if file extension is permitted
if file_ext not in ALLOWED_EXTENSIONS:
flash("File extension not supported, only csv files are permitted.")
return render_template("/upload.html")
# read in content of upload file
contents = file.read()
string_io = io.StringIO(contents.decode('utf-8'))
raw_uploaded_data = pd.read_csv(string_io)
# get column names supplied by user
sample_column, species_column, class_column = (request.form.get(column) for column in ["sampleColumn", "speciesColumn", "classColumn"])
uploaded_data = raw_uploaded_data.rename(columns={sample_column: "biobank_samples", species_column: "full_name", class_column: "class"})
# error if column names arent present
if 'full_name' not in uploaded_data.columns or 'biobank_samples' not in uploaded_data.columns or 'class' not in uploaded_data.columns:
flash(f"Error: Column names do not match or there are missing columns from your data. Please check 'full_name', 'class' and 'biobank_samples' are in your data column names: {[col for col in raw_uploaded_data.columns]}, or that you have supplied your files column names, your previous enteries were: {species_column, class_column, sample_column}.")
return render_template("/upload.html")
# error if full_name or class is non-object type
if uploaded_data['full_name'].dtype != object or uploaded_data['class'].dtype != object:
flash(f"Error: 'full_name' or 'class' column is not string type. 'full_name' must contain strings of species names. 'class' must contain strings of species class. Please check you data and try again.")
return render_template("/upload.html")
# error if biobank_samples contain non-numeric data
nonnumeric_list = nonnumeric_rows(uploaded_data, 'biobank_samples')
if len(nonnumeric_list) > 0:
flash(f"Error: 'biobank_samples' column type is not numeric type: {nonnumeric_list}. 'biobank_samples' must be counts of samples for each species.")
return render_template("/upload.html")
# warn user of missing data in their upload
missing_indexs = missing_data_rows(uploaded_data)
if len(missing_indexs) > 0:
flash(f"Warning: There is missing data that will be dropped when the data is added to database, see indexes: {missing_indexs}")
# warn user if duplicate species enteries
dup_fullnames = duplicate_rows(uploaded_data, 'full_name')
if len (dup_fullnames) > 0:
flash(f"Warning: There are duplicate species in your data which could result in data being dropped: {dup_fullnames}")
# save uploaded file to /temp folder
uploaded_data.to_parquet(TEMP_DIR + "uploaded_data.parquet", engine="fastparquet", index=False)
##------ If user adds data to database
if "add" in request.form:
uploaded_data = pd.read_parquet(TEMP_DIR + "uploaded_data.parquet")
# save uploaded biobank data to /temp folder, replacing old biobank data
uploaded_data.to_parquet(TEMP_DIR + "biobank.parquet", engine="fastparquet", index=False)
return redirect(url_for('display')) # display data
# Find uploaded names that arent present in database
data = pd.read_parquet(TEMP_DIR + "scored_dataset.parquet")
isin_result = uploaded_data["full_name"].isin(data["full_name"])
unmatched = uploaded_data.loc[~isin_result, 'full_name'].tolist()
no_unmatched = len(unmatched)
table = [
uploaded_data.head(500).to_html(
classes="table bg-light shadow table-striped \ table-hover m-0 shadow sticky-top table-light"
)
]
# render HTML with home.html and return post requests
return render_template("/add.html", unmatched=unmatched, no_unmatched=no_unmatched, tables=table)
@app.route("/display", methods=["GET", "POST"])
def display():
"""
Display priority dataframe
and weigthing.
"""
# If biobank data not provided yet
if not os.path.exists(TEMP_DIR + "biobank.parquet"):
# Redirect to upload page
return redirect(url_for('upload'))
data = pd.read_parquet(TEMP_DIR + "scored_dataset.parquet")
biobank_data = pd.read_parquet(TEMP_DIR + "biobank.parquet")
# rescale biobank data
calc = Calculations(biobank_data["biobank_samples"])
biobank_data["biobank_samples"]= calc.invert_max()
# add biobank data to rest of dataset
data = reduce(
lambda left, right: pd.merge(
left, right, on=["full_name", "class"], how="outer"
),
[biobank_data[["full_name", "class", "biobank_samples"]], data],
).drop_duplicates()
# assign species with no biobank_samples a value of 1
data["biobank_samples"].fillna(1, inplace=True)
# enforce float type
cols = [col for col in data.columns if col not in ["full_name", "class"]]
for col in cols:
data[col] = enforce_float(data[col])
# DataTable fails is any NaN are present in dataframe, failsafe- drop any rows that contain one or more NaN values
data = data.dropna()
# currently combining conservation value here as approach might change in future
data["conservation_value"] = (
data["iucn_category"] + data["cites_listing"] + data["ed_median"]
) / 3
data = data.drop(["iucn_category", "cites_listing", "ed_median"], axis=1)
#----- Weight data
if request.method == "POST":
# function that applies numeric weight to column
def apply_weight(data, column_name, weight):
try:
weight = int(weight or 1) # convert to integer or default to 1
except ValueError: # catch non-integer enteries and raise warning
flash(f"Invalid weight value for {column_name}: '{weight}'. Please supply numeric entry only")
return render_template("/display.html")
data[column_name] *= weight
# conservation weighting
weight = request.form.get(key='conservation', default=1)
apply_weight(data, "conservation_value", weight)
# demand weighting
weight = request.form.get(key='demand', default=1)
apply_weight(data, "demand", weight)
# biobank samples weighting
weight = request.form.get(key='samples', default=1)
apply_weight(data, "biobank_samples", weight)
# create parquet of weighted data
data.to_parquet(TEMP_DIR + "weighted_dataset.parquet", engine="fastparquet", index=False)
# download data if download button clicked
if "download" in request.form:
data.to_csv(os.path.join(DOWNLOAD_DIR + "priority-scores.csv"))
flash(f"Dataset downloaded to '/downloaded' folder")
# get class information for filters
classes = list(data["class"].drop_duplicates())
classes = [x for x in classes if x is not None]
classes.sort()
# Send the JSON response to the DataTable via Ajax
return render_template("/display.html", classes=classes)
@app.route("/display-data", methods=["POST", "GET"])
def display_data():
data = pd.read_parquet(TEMP_DIR + "weighted_dataset.parquet")
data["priority_score"] = (data[['biobank_samples', 'demand','conservation_value']].sum(axis="columns")) / 3 # calculate priority scores
data = data.sort_values("priority_score", ascending=False) # sort by priority score
data = data[['full_name', 'class', 'biobank_samples', 'demand', 'conservation_value', 'priority_score', 'null_percent']] # order columns
data = data.round(2) # round data to 2 decimal place
result = get_json_response(json_data= request.get_json(), data = data)
return result
# run app
if __name__ == "__main__":
app.config["SESSION_TYPE"] = "filesystem"
app.config["SECRET_KEY"] = SECRET_KEY
# port = int(os.environ.get("PORT", 5000))
# app.run(debug=True, host="0.0.0.0", port=port, threaded=True)
serve(app, host='0.0.0.0', port=5000)