Skip to content

Commit

Permalink
Merge branch 'master' into image-push
Browse files Browse the repository at this point in the history
  • Loading branch information
MukuFlash03 authored Aug 14, 2024
2 parents 5eff0be + c6d1507 commit efccd5a
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 34 deletions.
54 changes: 30 additions & 24 deletions app_sidebar_collapsible.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from utils.datetime_utils import iso_to_date_only
from utils.db_utils import df_to_filtered_records, query_uuids, query_confirmed_trips, query_demographics
from utils.permissions import has_permission
from utils.permissions import has_permission, config
import flask_talisman as flt


Expand Down Expand Up @@ -138,6 +138,8 @@
className="sidebar",
)

subgroups = config.get('opcode', {}).get('subgroups')
include_test_users = config.get('metrics', {}).get('include_test_users')
# Global controls including date picker and timezone selector
def make_controls():
# according to docs, DatePickerRange will accept YYYY-MM-DD format
Expand Down Expand Up @@ -165,7 +167,7 @@ def make_controls():
'border-radius': '3px', 'margin-left': '3px'}
),
],
style={'display': 'flex'},
style={'display': 'flex', 'margin-left': 'auto'},
),
dbc.Collapse([
html.Div([
Expand All @@ -183,21 +185,24 @@ def make_controls():
style={'width': '180px'},
)]
),

dcc.Checklist(
id='global-filters',
options=[
{'label': 'Exclude "test" users',
'value': 'exclude-test-users'},
],
value=['exclude-test-users'],
style={'margin-top': '10px'},
),
],
id='collapse-filters',
is_open=False,
style={'padding': '5px 15px 10px', 'border': '1px solid #dbdbdb', 'border-top': '0'}
),
html.Div([
html.Span('Exclude subgroups:'),
dcc.Dropdown(
id='excluded-subgroups',
options=subgroups or ['test'],
value=[] if include_test_users else ['test'],
multi=True,
style={'flex': '1'},
),
],
style={'display': 'flex', 'gap': '5px',
'align-items': 'center', 'margin-top': '10px'}
),
],
style={'margin': '10px 10px 0 auto',
'width': 'fit-content',
Expand Down Expand Up @@ -226,7 +231,7 @@ def make_layout(): return html.Div([
dcc.Location(id='url', refresh=False),
dcc.Store(id='store-trips', data={}),
dcc.Store(id='store-uuids', data={}),
dcc.Store(id='store-excluded-uuids', data={}), # if 'test' users are excluded, a list of their uuids
dcc.Store(id='store-excluded-uuids', data={}), # list of UUIDs from excluded subgroups
dcc.Store(id='store-demographics', data={}),
dcc.Store(id='store-trajectories', data={}),
html.Div(id='page-content', children=make_home_page()),
Expand Down Expand Up @@ -254,21 +259,21 @@ def toggle_collapse_filters(n, is_open):
Input('date-picker', 'start_date'), # these are ISO strings
Input('date-picker', 'end_date'), # these are ISO strings
Input('date-picker-timezone', 'value'),
Input('global-filters', 'value'),
Input('excluded-subgroups', 'value'),
)
def update_store_uuids(start_date, end_date, timezone, filters):
def update_store_uuids(start_date, end_date, timezone, excluded_subgroups):
(start_date, end_date) = iso_to_date_only(start_date, end_date)
dff = query_uuids(start_date, end_date, timezone)
if dff.empty:
return {"data": [], "length": 0}, {"data": [], "length": 0}
# if 'exclude-testusers' filter is active,
# exclude any rows with user_token containing 'test', and
# output a list of those excluded UUIDs so other callbacks can exclude them too
if 'exclude-test-users' in filters:
excluded_uuids_list = dff[dff['user_token'].str.contains(
'test')]['user_id'].tolist()
else:
excluded_uuids_list = []

# if any subgroups are excluded, find UUIDs in those subgroups and output
# a list to store-excluded-uuids so that other callbacks can exclude them too
excluded_uuids_list = []
for subgroup in excluded_subgroups:
uuids_in_subgroup = dff[dff['user_token'].str.contains(f"_{subgroup}_")]['user_id'].tolist()
excluded_uuids_list.extend(uuids_in_subgroup)

records = df_to_filtered_records(dff, 'user_id', excluded_uuids_list)
store_uuids = {
"data": records,
Expand Down Expand Up @@ -310,12 +315,13 @@ def update_store_demographics(start_date, end_date, timezone, excluded_uuids):
)
def update_store_trips(start_date, end_date, timezone, excluded_uuids):
(start_date, end_date) = iso_to_date_only(start_date, end_date)
df = query_confirmed_trips(start_date, end_date, timezone)
df, user_input_cols = query_confirmed_trips(start_date, end_date, timezone)
records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"])
# logging.debug("returning records %s" % records[0:2])
store = {
"data": records,
"length": len(records),
"userinputcols": user_input_cols
}
return store

Expand Down
4 changes: 4 additions & 0 deletions pages/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,16 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
columns.update(
col['label'] for col in perm_utils.get_allowed_named_trip_columns()
)
columns.update(store_trips["userinputcols"])
has_perm = perm_utils.has_permission('data_trips')
df = pd.DataFrame(data)
if df.empty or not has_perm:
return None

logging.debug(f"Final list of retained cols {columns=}")
logging.debug(f"Before dropping, {df.columns=}")
df = df.drop(columns=[col for col in df.columns if col not in columns])
logging.debug(f"After dropping, {df.columns=}")
df = clean_location_data(df)

trips_table = populate_datatable(df,'trips-table')
Expand Down
10 changes: 6 additions & 4 deletions utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
]

MULTILABEL_NAMED_COLS = [
{'label': 'mode_confirm', 'path': 'data.user_input.mode_confirm'},
{'label': 'purpose_confirm', 'path': 'data.user_input.purpose_confirm'},
{'label': 'replaced_mode', 'path': 'data.user_input.replaced_mode'},
]

VALID_TRIP_COLS = [
Expand All @@ -24,13 +21,18 @@
"data.distance_meters",
"data.start_loc.coordinates",
"data.end_loc.coordinates",
"data.primary_sensed_mode",
"data.primary_predicted_mode",
"data.primary_ble_sensed_mode",
"user_id"
]

BINARY_TRIP_COLS = [
'user_id',
'data.start_place',
'data.end_place',
"cleaned_section_summary",
"inferred_section_summary",
]

valid_uuids_columns = [
Expand Down Expand Up @@ -91,4 +93,4 @@
'data.local_dt_second',
'data.local_dt_weekday',
'data.local_dt_timezone',
]
]
52 changes: 46 additions & 6 deletions utils/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,56 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str):
ts = esta.TimeSeries.get_aggregate_time_series()
# Note to self, allow end_ts to also be null in the timequery
# we can then remove the start_time, end_time logic
entries = ts.find_entries(
key_list=["analysis/confirmed_trip"],
df = ts.get_data_df("analysis/confirmed_trip",
time_query=estt.TimeQuery("data.start_ts", start_ts, end_ts),
)
df = pd.json_normalize(list(entries))
user_input_cols = []

# logging.debug("Before filtering, df columns are %s" % df.columns)
logging.debug("Before filtering, df columns are %s" % df.columns)
if not df.empty:
columns = [col for col in perm_utils.get_all_trip_columns() if col in df.columns]
# Since we use `get_data_df` instead of `pd.json_normalize`,
# we lose the "data" prefix on the fields and they are only flattened one level
# Here, we restore the prefix for the VALID_TRIP_COLS from constants.py
# for backwards compatibility. We do this for all columns since columns which don't exist are ignored by the rename command.
rename_cols = constants.VALID_TRIP_COLS
# the mapping is `{distance: data.distance, duration: data.duration} etc
rename_mapping = dict(zip([c.replace("data.", "") for c in rename_cols], rename_cols))
logging.debug("Rename mapping is %s" % rename_mapping)
df.rename(columns=rename_mapping, inplace=True)
logging.debug("After renaming columns, they are %s" % df.columns)

# Now copy over the coordinates
df['data.start_loc.coordinates'] = df['start_loc'].apply(lambda g: g["coordinates"])
df['data.end_loc.coordinates'] = df['end_loc'].apply(lambda g: g["coordinates"])

# Add primary modes from the sensed, inferred and ble summaries. Note that we do this
# **before** filtering the `all_trip_columns` because the
# *_section_summary columns are not currently valid
get_max_mode_from_summary = lambda md: max(md["distance"], key=md["distance"].get) if len(md["distance"]) > 0 else "INVALID"
df["data.primary_sensed_mode"] = df.cleaned_section_summary.apply(get_max_mode_from_summary)
df["data.primary_predicted_mode"] = df.inferred_section_summary.apply(get_max_mode_from_summary)
if 'ble_sensed_summary' in df.columns:
df["data.primary_ble_sensed_mode"] = df.ble_sensed_summary.apply(get_max_mode_from_summary)
else:
logging.debug("No BLE support found, not fleet version, ignoring...")

# Expand the user inputs
user_input_df = pd.json_normalize(df.user_input)
df = pd.concat([df, user_input_df], axis='columns')
logging.debug(f"Before filtering {user_input_df.columns=}")
user_input_cols = [c for c in user_input_df.columns
if "metadata" not in c and
"xmlns" not in c and
"local_dt" not in c and
'xmlResponse' not in c and
"_id" not in c]
logging.debug(f"After filtering {user_input_cols=}")

combined_col_list = list(perm_utils.get_all_trip_columns()) + user_input_cols
logging.debug(f"Combined list {combined_col_list=}")
columns = [col for col in combined_col_list if col in df.columns]
df = df[columns]
logging.debug(f"After filtering against the combined list {df.columns=}")
# logging.debug("After getting all columns, they are %s" % df.columns)
for col in constants.BINARY_TRIP_COLS:
if col in df.columns:
Expand Down Expand Up @@ -110,7 +150,7 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str):
# logging.debug("After filtering, df columns are %s" % df.columns)
# logging.debug("After filtering, the actual data is %s" % df.head())
# logging.debug("After filtering, the actual data is %s" % df.head().trip_start_time_str)
return df
return (df, user_input_cols)

def query_demographics():
# Returns dictionary of df where key represent differnt survey id and values are df for each survey
Expand Down
5 changes: 5 additions & 0 deletions utils/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,18 @@ def get_all_trip_columns():

columns.update(get_required_columns())
# logging.debug("get_all_trip_columns: curr set is %s" % columns)
columns.update(permissions.get('additional_trip_columns', []))
logging.debug("get_all_trip_columns: after additional columns, curr set is %s" % columns)
return columns


def get_allowed_trip_columns():
columns = set(constants.VALID_TRIP_COLS)
for column in permissions.get("data_trips_columns_exclude", []):
columns.discard(column)
for column in permissions.get("additional_trip_columns", []):
columns.add(column)
logging.debug("allowed trip columns are %s" % columns)
return columns


Expand Down

0 comments on commit efccd5a

Please sign in to comment.