Merge branch 'master' into image-push

MukuFlash03 · Aug 14, 2024 · efccd5a · efccd5a
2 parents 5eff0be + c6d1507
commit efccd5a
Show file tree

Hide file tree

Showing 5 changed files with 91 additions and 34 deletions.
diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py
@@ -26,7 +26,7 @@
 
 from utils.datetime_utils import iso_to_date_only
 from utils.db_utils import df_to_filtered_records, query_uuids, query_confirmed_trips, query_demographics
-from utils.permissions import has_permission
+from utils.permissions import has_permission, config
 import flask_talisman as flt
 
 
@@ -138,6 +138,8 @@
     className="sidebar",
 )
 
+subgroups = config.get('opcode', {}).get('subgroups')
+include_test_users = config.get('metrics', {}).get('include_test_users')
 # Global controls including date picker and timezone selector
 def make_controls():
   # according to docs, DatePickerRange will accept YYYY-MM-DD format
@@ -165,7 +167,7 @@ def make_controls():
                     'border-radius': '3px', 'margin-left': '3px'}
           ),
       ],
-          style={'display': 'flex'},
+          style={'display': 'flex', 'margin-left': 'auto'},
       ),
       dbc.Collapse([
           html.Div([
@@ -183,21 +185,24 @@ def make_controls():
                   style={'width': '180px'},
               )]
           ),
-
-          dcc.Checklist(
-              id='global-filters',
-              options=[
-                  {'label': 'Exclude "test" users',
-                   'value': 'exclude-test-users'},
-              ],
-              value=['exclude-test-users'],
-              style={'margin-top': '10px'},
-          ),
       ],
           id='collapse-filters',
           is_open=False,
           style={'padding': '5px 15px 10px', 'border': '1px solid #dbdbdb', 'border-top': '0'}
       ),
+      html.Div([
+          html.Span('Exclude subgroups:'),
+          dcc.Dropdown(
+              id='excluded-subgroups',
+              options=subgroups or ['test'],
+              value=[] if include_test_users else ['test'],
+              multi=True,
+              style={'flex': '1'},
+          ),
+      ],
+          style={'display': 'flex', 'gap': '5px',
+                 'align-items': 'center', 'margin-top': '10px'}
+      ),
   ],
       style={'margin': '10px 10px 0 auto',
              'width': 'fit-content',
@@ -226,7 +231,7 @@ def make_layout(): return html.Div([
     dcc.Location(id='url', refresh=False),
     dcc.Store(id='store-trips', data={}),
     dcc.Store(id='store-uuids', data={}),
-    dcc.Store(id='store-excluded-uuids', data={}), # if 'test' users are excluded, a list of their uuids
+    dcc.Store(id='store-excluded-uuids', data={}), # list of UUIDs from excluded subgroups
     dcc.Store(id='store-demographics', data={}),
     dcc.Store(id='store-trajectories', data={}),
     html.Div(id='page-content', children=make_home_page()),
@@ -254,21 +259,21 @@ def toggle_collapse_filters(n, is_open):
     Input('date-picker', 'start_date'),  # these are ISO strings
     Input('date-picker', 'end_date'),  # these are ISO strings
     Input('date-picker-timezone', 'value'),
-    Input('global-filters', 'value'),
+    Input('excluded-subgroups', 'value'),
 )
-def update_store_uuids(start_date, end_date, timezone, filters):
+def update_store_uuids(start_date, end_date, timezone, excluded_subgroups):
     (start_date, end_date) = iso_to_date_only(start_date, end_date)
     dff = query_uuids(start_date, end_date, timezone)
     if dff.empty:
         return {"data": [], "length": 0}, {"data": [], "length": 0}
-    # if 'exclude-testusers' filter is active,
-    # exclude any rows with user_token containing 'test', and
-    # output a list of those excluded UUIDs so other callbacks can exclude them too
-    if 'exclude-test-users' in filters:
-        excluded_uuids_list = dff[dff['user_token'].str.contains(
-            'test')]['user_id'].tolist()
-    else:
-        excluded_uuids_list = []
+
+    # if any subgroups are excluded, find UUIDs in those subgroups and output
+    # a list to store-excluded-uuids so that other callbacks can exclude them too
+    excluded_uuids_list = []
+    for subgroup in excluded_subgroups:
+        uuids_in_subgroup = dff[dff['user_token'].str.contains(f"_{subgroup}_")]['user_id'].tolist()
+        excluded_uuids_list.extend(uuids_in_subgroup)
+
     records = df_to_filtered_records(dff, 'user_id', excluded_uuids_list)
     store_uuids = {
         "data": records,
@@ -310,12 +315,13 @@ def update_store_demographics(start_date, end_date, timezone, excluded_uuids):
 )
 def update_store_trips(start_date, end_date, timezone, excluded_uuids):
     (start_date, end_date) = iso_to_date_only(start_date, end_date)
-    df = query_confirmed_trips(start_date, end_date, timezone)
+    df, user_input_cols = query_confirmed_trips(start_date, end_date, timezone)
     records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"])
     # logging.debug("returning records %s" % records[0:2])
     store = {
         "data": records,
         "length": len(records),
+        "userinputcols": user_input_cols
     }
     return store
 

diff --git a/pages/data.py b/pages/data.py
@@ -74,12 +74,16 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de
         columns.update(
             col['label'] for col in perm_utils.get_allowed_named_trip_columns()
         )
+        columns.update(store_trips["userinputcols"])
         has_perm = perm_utils.has_permission('data_trips')
         df = pd.DataFrame(data)
         if df.empty or not has_perm:
             return None
 
+        logging.debug(f"Final list of retained cols {columns=}")
+        logging.debug(f"Before dropping, {df.columns=}")
         df = df.drop(columns=[col for col in df.columns if col not in columns])
+        logging.debug(f"After dropping, {df.columns=}")
         df = clean_location_data(df)
 
         trips_table = populate_datatable(df,'trips-table')

diff --git a/utils/constants.py b/utils/constants.py
@@ -6,9 +6,6 @@
 ]
 
 MULTILABEL_NAMED_COLS = [
-    {'label': 'mode_confirm', 'path': 'data.user_input.mode_confirm'},
-    {'label': 'purpose_confirm', 'path': 'data.user_input.purpose_confirm'},
-    {'label': 'replaced_mode', 'path': 'data.user_input.replaced_mode'},
 ]
 
 VALID_TRIP_COLS = [
@@ -24,13 +21,18 @@
     "data.distance_meters",
     "data.start_loc.coordinates",
     "data.end_loc.coordinates",
+    "data.primary_sensed_mode",
+    "data.primary_predicted_mode",
+    "data.primary_ble_sensed_mode",
     "user_id"
 ]
 
 BINARY_TRIP_COLS = [
     'user_id',
     'data.start_place',
     'data.end_place',
+    "cleaned_section_summary",
+    "inferred_section_summary",
 ]
 
 valid_uuids_columns = [
@@ -91,4 +93,4 @@
     'data.local_dt_second',
     'data.local_dt_weekday',
     'data.local_dt_timezone',
-]
+]
diff --git a/utils/db_utils.py b/utils/db_utils.py
@@ -69,16 +69,56 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str):
     ts = esta.TimeSeries.get_aggregate_time_series()
     # Note to self, allow end_ts to also be null in the timequery
     # we can then remove the start_time, end_time logic
-    entries = ts.find_entries(
-        key_list=["analysis/confirmed_trip"],
+    df = ts.get_data_df("analysis/confirmed_trip",
         time_query=estt.TimeQuery("data.start_ts", start_ts, end_ts),
     )
-    df = pd.json_normalize(list(entries))
+    user_input_cols = []
 
-    # logging.debug("Before filtering, df columns are %s" % df.columns)
+    logging.debug("Before filtering, df columns are %s" % df.columns)
     if not df.empty:
-        columns = [col for col in perm_utils.get_all_trip_columns() if col in df.columns]
+        # Since we use `get_data_df` instead of `pd.json_normalize`,
+        # we lose the "data" prefix on the fields and they are only flattened one level
+        # Here, we restore the prefix for the VALID_TRIP_COLS from constants.py
+        # for backwards compatibility. We do this for all columns since columns which don't exist are ignored by the rename command.
+        rename_cols = constants.VALID_TRIP_COLS
+        # the mapping is `{distance: data.distance, duration: data.duration} etc
+        rename_mapping = dict(zip([c.replace("data.", "") for c in rename_cols], rename_cols))
+        logging.debug("Rename mapping is %s" % rename_mapping)
+        df.rename(columns=rename_mapping, inplace=True)
+        logging.debug("After renaming columns, they are %s" % df.columns)
+
+        # Now copy over the coordinates
+        df['data.start_loc.coordinates'] = df['start_loc'].apply(lambda g: g["coordinates"])
+        df['data.end_loc.coordinates'] = df['end_loc'].apply(lambda g: g["coordinates"])
+
+        # Add primary modes from the sensed, inferred and ble summaries. Note that we do this
+        # **before** filtering the `all_trip_columns` because the
+        # *_section_summary columns are not currently valid
+        get_max_mode_from_summary = lambda md: max(md["distance"], key=md["distance"].get) if len(md["distance"]) > 0 else "INVALID"
+        df["data.primary_sensed_mode"] = df.cleaned_section_summary.apply(get_max_mode_from_summary)
+        df["data.primary_predicted_mode"] = df.inferred_section_summary.apply(get_max_mode_from_summary)
+        if 'ble_sensed_summary' in df.columns:
+            df["data.primary_ble_sensed_mode"] = df.ble_sensed_summary.apply(get_max_mode_from_summary)
+        else:
+            logging.debug("No BLE support found, not fleet version, ignoring...")
+
+        # Expand the user inputs
+        user_input_df = pd.json_normalize(df.user_input)
+        df = pd.concat([df, user_input_df], axis='columns')
+        logging.debug(f"Before filtering {user_input_df.columns=}")
+        user_input_cols = [c for c in user_input_df.columns
+            if "metadata" not in c and
+               "xmlns" not in c and
+               "local_dt" not in c and
+               'xmlResponse' not in c and
+               "_id" not in c]
+        logging.debug(f"After filtering {user_input_cols=}")
+
+        combined_col_list = list(perm_utils.get_all_trip_columns()) + user_input_cols
+        logging.debug(f"Combined list {combined_col_list=}")
+        columns = [col for col in combined_col_list if col in df.columns]
         df = df[columns]
+        logging.debug(f"After filtering against the combined list {df.columns=}")
         # logging.debug("After getting all columns, they are %s" % df.columns)
         for col in constants.BINARY_TRIP_COLS:
             if col in df.columns:
@@ -110,7 +150,7 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str):
     # logging.debug("After filtering, df columns are %s" % df.columns)
     # logging.debug("After filtering, the actual data is %s" % df.head())
     # logging.debug("After filtering, the actual data is %s" % df.head().trip_start_time_str)
-    return df
+    return (df, user_input_cols)
 
 def query_demographics():
     # Returns dictionary of df where key represent differnt survey id and values are df for each survey

diff --git a/utils/permissions.py b/utils/permissions.py
@@ -77,13 +77,18 @@ def get_all_trip_columns():
 
     columns.update(get_required_columns())
     # logging.debug("get_all_trip_columns: curr set is %s" % columns)
+    columns.update(permissions.get('additional_trip_columns', []))
+    logging.debug("get_all_trip_columns: after additional columns, curr set is %s" % columns)
     return columns
 
 
 def get_allowed_trip_columns():
     columns = set(constants.VALID_TRIP_COLS)
     for column in permissions.get("data_trips_columns_exclude", []):
         columns.discard(column)
+    for column in permissions.get("additional_trip_columns", []):
+        columns.add(column)
+    logging.debug("allowed trip columns are %s" % columns)
     return columns