From 131d909e5f2a18148232aa06391ad74d7e5babf9 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Fri, 14 Jun 2024 13:37:39 -0400 Subject: [PATCH 1/8] expand filters to be able to exclude any subgroup(s) We had a single checkbox to exclude test users, which worked fine for that purpose. But for programs with multiple subgroups we want to be able to filter by any of the subgroups I modified this into a multi dropdown which has all subgroups from the config (or just 'test' if the config doesn't define subgroups) I also made the default value reflect metrics.include_test_users from the config. --- app_sidebar_collapsible.py | 51 +++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index f446833..eda3a73 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -25,7 +25,7 @@ from utils.datetime_utils import iso_to_date_only from utils.db_utils import df_to_filtered_records, query_uuids, query_confirmed_trips, query_demographics -from utils.permissions import has_permission +from utils.permissions import has_permission, config import flask_talisman as flt @@ -134,6 +134,8 @@ className="sidebar", ) +subgroups = config.get('opcode', {}).get('subgroups') +include_test_users = config.get('metrics', {}).get('include_test_users') # Global controls including date picker and timezone selector def make_controls(): # according to docs, DatePickerRange will accept YYYY-MM-DD format @@ -161,7 +163,7 @@ def make_controls(): 'border-radius': '3px', 'margin-left': '3px'} ), ], - style={'display': 'flex'}, + style={'display': 'flex', 'margin-left': 'auto'}, ), dbc.Collapse([ html.Div([ @@ -179,21 +181,24 @@ def make_controls(): style={'width': '180px'}, )] ), - - dcc.Checklist( - id='global-filters', - options=[ - {'label': 'Exclude "test" users', - 'value': 'exclude-test-users'}, - ], - value=['exclude-test-users'], - style={'margin-top': '10px'}, - ), ], id='collapse-filters', is_open=False, style={'padding': '5px 15px 10px', 'border': '1px solid #dbdbdb', 'border-top': '0'} ), + html.Div([ + html.Span('Exclude subgroups:'), + dcc.Dropdown( + id='excluded-subgroups', + options=subgroups or ['test'], + value=[] if include_test_users else ['test'], + multi=True, + style={'flex': '1'}, + ), + ], + style={'display': 'flex', 'gap': '5px', + 'align-items': 'center', 'margin-top': '10px'} + ), ], style={'margin': '10px 10px 0 auto', 'width': 'fit-content', @@ -222,7 +227,7 @@ def make_layout(): return html.Div([ dcc.Location(id='url', refresh=False), dcc.Store(id='store-trips', data={}), dcc.Store(id='store-uuids', data={}), - dcc.Store(id='store-excluded-uuids', data={}), # if 'test' users are excluded, a list of their uuids + dcc.Store(id='store-excluded-uuids', data={}), # list of UUIDs from excluded subgroups dcc.Store(id='store-demographics', data={}), dcc.Store(id='store-trajectories', data={}), html.Div(id='page-content', children=make_home_page()), @@ -250,21 +255,21 @@ def toggle_collapse_filters(n, is_open): Input('date-picker', 'start_date'), # these are ISO strings Input('date-picker', 'end_date'), # these are ISO strings Input('date-picker-timezone', 'value'), - Input('global-filters', 'value'), + Input('excluded-subgroups', 'value'), ) -def update_store_uuids(start_date, end_date, timezone, filters): +def update_store_uuids(start_date, end_date, timezone, excluded_subgroups): (start_date, end_date) = iso_to_date_only(start_date, end_date) dff = query_uuids(start_date, end_date, timezone) if dff.empty: return {"data": [], "length": 0}, {"data": [], "length": 0} - # if 'exclude-testusers' filter is active, - # exclude any rows with user_token containing 'test', and - # output a list of those excluded UUIDs so other callbacks can exclude them too - if 'exclude-test-users' in filters: - excluded_uuids_list = dff[dff['user_token'].str.contains( - 'test')]['user_id'].tolist() - else: - excluded_uuids_list = [] + + # if any subgroups are excluded, find UUIDs in those subgroups and output + # a list to store-excluded-uuids so that other callbacks can exclude them too + excluded_uuids_list = [] + for subgroup in excluded_subgroups: + uuids_in_subgroup = dff[dff['user_token'].str.contains(f"_{subgroup}_")]['user_id'].tolist() + excluded_uuids_list.extend(uuids_in_subgroup) + records = df_to_filtered_records(dff, 'user_id', excluded_uuids_list) store_uuids = { "data": records, From 29a1bc5fe08f5c73aa72f0039c8782ef8a120550 Mon Sep 17 00:00:00 2001 From: Shankari Date: Sat, 6 Jul 2024 21:20:11 -0700 Subject: [PATCH 2/8] =?UTF-8?q?=E2=9C=A8=20Incorporate=20the=20sensed/infe?= =?UTF-8?q?rred/BLE=20sensed=20primary=20modes=20into=20the=20trip=20table?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My original thought was that this would be a simple 2-3 line change where I would enable the cleaned/inferred mode summaries in the set of valid columns. However, while actually implementing it, I ran into several issues: 1. we display the primary sensed mode in the public dashboard, so we can't just display the summary, we have to extract the primary mode 2. I then switched to using the primary mode computation from the public dashboard, only to discover that it needs the summary as an object. However, the previous implementation of this code used `pd.json_normalize`, which resulted in columns like ``` 'data.inferred_section_summary.distance.TRAIN', 'data.inferred_section_summary.duration.TRAIN', 'data.inferred_section_summary.count.TRAIN', 'data.inferred_section_summary.distance.TRAM', 'data.inferred_section_summary.duration.TRAM', 'data.inferred_section_summary.count.TRAM'], ``` and not the expected ``` 'inferred_section_summary': {'distance': {'CAR': ...}} ``` 3. To workaround this, I switched to using `get_data_df`, as we do in the public dashboard. This gave us the correct objects *but* changed the names of the columns so that they no longer had the `data` prefix. This could lead to backwards compat issues with the excluded columns from the configs. ``` "data_trips_columns_exclude": ["data.start_loc.coordinates", "data.end_loc.coordinates"], ``` So the current solution is: - use data_df - rename the columns using a dynamically generated mapper for backwards compat - find the primary mode in the summaries - enable the primary modes as valid columns The rest of the code is untouched Testing done: See screenshots in PR Note further that the list of valid cols has `data.start_local_dt` and `data.end_local_dt` however, the actual columns are `end_local_dt_day` etc so are not renamed. I double checked, and I am not sure how this ever worked - `pd.json_normalize` also returns ``` data.end_local_dt.year data.end_local_dt.month data.end_local_dt.day data.end_local_dt.hour ``` and I have verified that the columns don't show up on staging @JGreenlee @louisg1337 here's another area to unify data handling. Note finally that I also tried enabling the full summary objects, but they made the table very long and fairly confusing, so I removed them again. They can be enabled by editing `utils/constants.py` again --- utils/constants.py | 7 ++++++- utils/db_utils.py | 34 ++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/utils/constants.py b/utils/constants.py index 3d53363..c61653d 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -24,6 +24,9 @@ "data.distance_meters", "data.start_loc.coordinates", "data.end_loc.coordinates", + "data.primary_sensed_mode", + "data.primary_predicted_mode", + "data.primary_ble_sensed_mode", "user_id" ] @@ -31,6 +34,8 @@ 'user_id', 'data.start_place', 'data.end_place', + "cleaned_section_summary", + "inferred_section_summary", ] valid_uuids_columns = [ @@ -91,4 +96,4 @@ 'data.local_dt_second', 'data.local_dt_weekday', 'data.local_dt_timezone', -] \ No newline at end of file +] diff --git a/utils/db_utils.py b/utils/db_utils.py index 1e8aba2..17169fb 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -69,14 +69,40 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): ts = esta.TimeSeries.get_aggregate_time_series() # Note to self, allow end_ts to also be null in the timequery # we can then remove the start_time, end_time logic - entries = ts.find_entries( - key_list=["analysis/confirmed_trip"], + df = ts.get_data_df("analysis/confirmed_trip", time_query=estt.TimeQuery("data.start_ts", start_ts, end_ts), ) - df = pd.json_normalize(list(entries)) - # logging.debug("Before filtering, df columns are %s" % df.columns) + logging.debug("Before filtering, df columns are %s" % df.columns) if not df.empty: + # Since we use `get_data_df` instead of `pd.json_normalize`, + # we lose the "data" prefix on the fields and they are only flattened one level + # Here, we restore the prefix for the VALID_TRIP_COLS from constants.py + # for backwards compatibility. We do this for all columns EXCEPT: + # 1. the coordinates, which we will have to pull out from the geojson anyway + # 2. the user_id, which doesn't need to be copied + # 3. the primary modes, which have not yet been populated + rename_cols = [c for c in constants.VALID_TRIP_COLS if c is not "user_id"] + # the mapping is `{distance: data.distance, duration: data.duration} etc + rename_mapping = dict(zip([c.replace("data.", "") for c in rename_cols], rename_cols)) + logging.debug("Rename mapping is %s" % rename_mapping) + df.rename(columns=rename_mapping, inplace=True) + logging.debug("After renaming columns, they are %s" % df.columns) + + # Now copy over the coordinates + df['data.start_loc.coordinates'] = df['start_loc'].apply(lambda g: g["coordinates"]) + df['data.end_loc.coordinates'] = df['end_loc'].apply(lambda g: g["coordinates"]) + + # Add sensed, inferred and ble summaries. Note that we do this + # **before** filtering the `all_trip_columns` because the + # *_section_summary columns are not currently valid + get_max_mode_from_summary = lambda md: max(md["distance"], key=md["distance"].get) if len(md["distance"]) > 0 else "INVALID" + df["data.primary_sensed_mode"] = df.cleaned_section_summary.apply(get_max_mode_from_summary) + df["data.primary_predicted_mode"] = df.inferred_section_summary.apply(get_max_mode_from_summary) + if 'ble_sensed_summary' in df.columns: + df["data.primary_ble_sensed_mode"] = df.ble_sensed_summary.apply(get_max_mode_from_summary) + else: + logging.debug("No BLE support found, not fleet version, ignoring...") columns = [col for col in perm_utils.get_all_trip_columns() if col in df.columns] df = df[columns] # logging.debug("After getting all columns, they are %s" % df.columns) From 55e1464b250a127cccad85f1385fef31b204f57f Mon Sep 17 00:00:00 2001 From: Shankari Date: Sat, 6 Jul 2024 22:27:13 -0700 Subject: [PATCH 3/8] =?UTF-8?q?=F0=9F=90=9B=20=F0=9F=A6=95=20Expand=20the?= =?UTF-8?q?=20user=20inputs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While testing the first commit in this PR (https://github.com/e-mission/op-admin-dashboard/pull/118/commits/29a1bc5fe08f5c73aa72f0039c8782ef8a120550), I found that it displayed the sensed values but *did not* display the confirmed values. This is because of the change to `get_data_df` instead of `json_normalize` - the column names were now different. So the `named_cols` did not add the correct cols and they were not in `VALID_TRIP_COLS` either. Fixed by: - Expanding this field using `json_normalize` This resulted in columns ``` >>> pd.json_normalize(df.user_input) purpose_confirm mode_confirm replaced_mode ... ``` - Concatenating with the existing dataframe - the columns now no longer need to be mapped to user friendly names, so removing them from the named cols - adding them to the valid cols to ensure that they are not stripped out Testing done: See screenshot --- utils/constants.py | 6 +++--- utils/db_utils.py | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/utils/constants.py b/utils/constants.py index c61653d..d75137c 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -6,9 +6,6 @@ ] MULTILABEL_NAMED_COLS = [ - {'label': 'mode_confirm', 'path': 'data.user_input.mode_confirm'}, - {'label': 'purpose_confirm', 'path': 'data.user_input.purpose_confirm'}, - {'label': 'replaced_mode', 'path': 'data.user_input.replaced_mode'}, ] VALID_TRIP_COLS = [ @@ -27,6 +24,9 @@ "data.primary_sensed_mode", "data.primary_predicted_mode", "data.primary_ble_sensed_mode", + "mode_confirm", + "purpose_confirm", + "replaced_mode", "user_id" ] diff --git a/utils/db_utils.py b/utils/db_utils.py index 17169fb..ca8a302 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -82,7 +82,7 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # 1. the coordinates, which we will have to pull out from the geojson anyway # 2. the user_id, which doesn't need to be copied # 3. the primary modes, which have not yet been populated - rename_cols = [c for c in constants.VALID_TRIP_COLS if c is not "user_id"] + rename_cols = constants.VALID_TRIP_COLS # the mapping is `{distance: data.distance, duration: data.duration} etc rename_mapping = dict(zip([c.replace("data.", "") for c in rename_cols], rename_cols)) logging.debug("Rename mapping is %s" % rename_mapping) @@ -103,6 +103,10 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): df["data.primary_ble_sensed_mode"] = df.ble_sensed_summary.apply(get_max_mode_from_summary) else: logging.debug("No BLE support found, not fleet version, ignoring...") + + # Expand the user inputs + df = pd.concat([df, pd.json_normalize(df.user_input)], axis='columns') + columns = [col for col in perm_utils.get_all_trip_columns() if col in df.columns] df = df[columns] # logging.debug("After getting all columns, they are %s" % df.columns) From 1b82c7dfae60845ec061ad5bdf184d2e338b99d9 Mon Sep 17 00:00:00 2001 From: Shankari Date: Sat, 6 Jul 2024 23:13:59 -0700 Subject: [PATCH 4/8] =?UTF-8?q?=F0=9F=94=A7=20Handle=20additional=20column?= =?UTF-8?q?s=20correctly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this, the additional columns were only used in survey (ENKETO) mode. However, we can repurpose them to build on https://github.com/e-mission/op-admin-dashboard/pull/118/commits/29a1bc5fe08f5c73aa72f0039c8782ef8a120550 and allow selected programs to see the summary breakdown To support this, I had to add the additional columns to both the list of all trip columns (`get_all_trip_columns`) and the displayed columns `get_allowed_trip_columns`. Note that only the first is not sufficient, since then we will get the column passed through the loading stage but will not display Testing done: - Started up a local config server - Configured the dashboard to use it in docker compose ``` - CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" - STUDY_CONFIG: "stage-program" + CONFIG_PATH: "http://host.docker.internal:9090/configs/" + STUDY_CONFIG: "smart-commute-ebike" ``` Edited the smart-commute json to include an additional column ``` "additional_trip_columns": ["cleaned_section_summary"], ``` Only that column was displayed ``` 2024-07-06 23:19:14 DEBUG:root:Before filtering, df columns are Index(['source', 'end_ts', 'end_fmt_time', 'end_loc', 'raw_trip', 'start_ts', 'start_fmt_time', 'start_loc', 'duration', 'distance', 'start_place', 'end_place', 'cleaned_trip', 'inferred_labels', 'inferred_trip', 'expectation', 'confidence_threshold', 'expected_trip', 'inferred_section_summary', 'cleaned_section_summary', 'user_input', 'additions', 'start_local_dt_year', 'start_local_dt_month', 'start_local_dt_day', 'start_local_dt_hour', 'start_local_dt_minute', 'start_local_dt_second', 'start_local_dt_weekday', 'start_local_dt_timezone', 'end_local_dt_year', 'end_local_dt_month', 'end_local_dt_day', 'end_local_dt_hour', 'end_local_dt_minute', 'end_local_dt_second', 'end_local_dt_weekday', 'end_local_dt_timezone', '_id', 'user_id', 'metadata_write_ts', 'confirmed_place'], dtype='object') 2024-07-06 23:19:14 DEBUG:root:allowed trip columns are {'data.duration', 'replaced_mode', 'data.distance', 'data.distance_km', 'mode_confirm', 'data.distance_meters', 'data.start_local_dt', 'data.duration_seconds', 'data.primary_predicted_mode', 'cleaned_section_summary', 'data.end_fmt_time', 'purpose_confirm', 'data.distance_miles', 'data.primary_ble_sensed_mode', 'data.start_fmt_time', 'user_id', 'data.primary_sensed_mode', 'data.end_local_dt'} 2024-07-06 23:19:14 DEBUG:root:get_all_trip_columns: after additional columns, curr set is {'data.end_fmt_time', 'purpose_confirm', 'data.distance_miles', 'data.duration', 'replaced_mode', 'data.distance', 'data.start_loc.coordinates', 'data.distance_km', 'mode_confirm', 'data.end_loc.coordinates', 'data.primary_ble_sensed_mode', 'data.start_fmt_time', 'data.distance_meters', 'user_id', 'data.start_local_dt', 'data.duration_seconds', 'data.primary_predicted_mode', 'data.primary_sensed_mode', 'cleaned_section_summary', 'data.end_local_dt'} 2024-07-06 23:19:14 INFO:werkzeug:192.168.65.1 - - [07/Jul/2024 06:19:14] "POST /_dash-update-component HTTP/1.1" 200 - 2024-07-06 23:19:14 DEBUG:root:allowed trip columns are {'data.duration', 'replaced_mode', 'data.distance', 'data.distance_km', 'mode_confirm', 'data.distance_meters', 'data.start_local_dt', 'data.duration_seconds', 'data.primary_predicted_mode', 'cleaned_section_summary', 'data.end_fmt_time', 'purpose_confirm', 'data.distance_miles', 'data.primary_ble_sensed_mode', 'data.start_fmt_time', 'user_id', 'data.primary_sensed_mode', 'data.end_local_dt'} ``` --- utils/permissions.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/utils/permissions.py b/utils/permissions.py index a5304a4..2ae9c9b 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -77,6 +77,8 @@ def get_all_trip_columns(): columns.update(get_required_columns()) # logging.debug("get_all_trip_columns: curr set is %s" % columns) + columns.update(permissions.get('additional_trip_columns', [])) + logging.debug("get_all_trip_columns: after additional columns, curr set is %s" % columns) return columns @@ -84,6 +86,9 @@ def get_allowed_trip_columns(): columns = set(constants.VALID_TRIP_COLS) for column in permissions.get("data_trips_columns_exclude", []): columns.discard(column) + for column in permissions.get("additional_trip_columns", []): + columns.add(column) + logging.debug("allowed trip columns are %s" % columns) return columns From 064d9129b0b17af27b7de114b6678998fa509cb5 Mon Sep 17 00:00:00 2001 From: Shankari Date: Sun, 7 Jul 2024 23:00:23 -0700 Subject: [PATCH 5/8] =?UTF-8?q?=F0=9F=94=A7=20Support=20showing=20trip=20c?= =?UTF-8?q?onfirm=20survey=20results=20in=20the=20trip=20table?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes the list of user input columns depend on the user input results. https://github.com/e-mission/op-admin-dashboard/pull/118#issuecomment-2212737836 - We determine this list of inputs while loading the data, around the time that we concatenate it with the rest of the trip table. - We then filter out columns related to the metadata and the local_dt, the xmlResponse and the binary elements with the `_id` name - We ensure that these columns are not dropped in the queried dataframe - We then plumb this list through by returning it from the `query_confirmed_trips` function and storing it in the store_trips datastore - We then ensure that the columns are not dropped before display either The result is that the trip specific survey results are displayed in the trips table. This is true whether the input is labels or surveys. This ensures that the GSA GPG project can see survey results in the trip table as well. Testing done: - Ran the admin dashboard on a dataset that had mixed trip confirm surveys and labels. Both were shown in the trip table. ``` DEBUG:root:Before filtering user_input_df.columns=Index(['mode_confirm', 'purpose_confirm', 'trip_user_input._id', 'trip_user_input.user_id', 'trip_user_input.metadata.key', 'trip_user_input.metadata.platform', 'trip_user_input.metadata.read_ts', 'trip_user_input.metadata.time_zone', 'trip_user_input.metadata.type', 'trip_user_input.metadata.write_ts', 'trip_user_input.metadata.write_local_dt.year', 'trip_user_input.metadata.write_local_dt.month', 'trip_user_input.metadata.write_local_dt.day', 'trip_user_input.metadata.write_local_dt.hour', 'trip_user_input.metadata.write_local_dt.minute', 'trip_user_input.metadata.write_local_dt.second', 'trip_user_input.metadata.write_local_dt.weekday', 'trip_user_input.metadata.write_local_dt.timezone', 'trip_user_input.metadata.write_fmt_time', 'trip_user_input.data.label', 'trip_user_input.data.name', 'trip_user_input.data.version', 'trip_user_input.data.xmlResponse', 'trip_user_input.data.jsonDocResponse.data.attr.xmlns:jr', 'trip_user_input.data.jsonDocResponse.data.attr.xmlns:odk', 'trip_user_input.data.jsonDocResponse.data.attr.xmlns:orx', 'trip_user_input.data.jsonDocResponse.data.attr.id', 'trip_user_input.data.jsonDocResponse.data.start', 'trip_user_input.data.jsonDocResponse.data.end', 'trip_user_input.data.jsonDocResponse.data.destination_purpose', 'trip_user_input.data.jsonDocResponse.data.travel_mode', 'trip_user_input.data.jsonDocResponse.data.Total_people_in_trip_party', 'trip_user_input.data.jsonDocResponse.data.Non_household_member_s_on_trip', 'trip_user_input.data.jsonDocResponse.data.Vehicle_trip_Parking_location', 'trip_user_input.data.jsonDocResponse.data.Parking_cost', 'trip_user_input.data.jsonDocResponse.data.Total_toll_charges_p_uring_the_trip_AUD', 'trip_user_input.data.jsonDocResponse.data.Transit_fees_AUD', 'trip_user_input.data.jsonDocResponse.data.Taxi_fees', 'trip_user_input.data.jsonDocResponse.data.meta.instanceID', 'trip_user_input.data.start_ts', 'trip_user_input.data.end_ts', 'trip_user_input.data.match_id', 'trip_user_input.data.start_local_dt.year', 'trip_user_input.data.start_local_dt.month', 'trip_user_input.data.start_local_dt.day', 'trip_user_input.data.start_local_dt.hour', 'trip_user_input.data.start_local_dt.minute', 'trip_user_input.data.start_local_dt.second', 'trip_user_input.data.start_local_dt.weekday', 'trip_user_input.data.start_local_dt.timezone', 'trip_user_input.data.start_fmt_time', 'trip_user_input.data.end_local_dt.year', 'trip_user_input.data.end_local_dt.month', 'trip_user_input.data.end_local_dt.day', 'trip_user_input.data.end_local_dt.hour', 'trip_user_input.data.end_local_dt.minute', 'trip_user_input.data.end_local_dt.second', 'trip_user_input.data.end_local_dt.weekday', 'trip_user_input.data.end_local_dt.timezone', 'trip_user_input.data.end_fmt_time', 'trip_user_input.metadata.plugin', 'trip_user_input.data.jsonDocResponse.data.attrxmlns:jr', 'trip_user_input.data.jsonDocResponse.data.attrxmlns:odk', 'trip_user_input.data.jsonDocResponse.data.attrxmlns:orx', 'trip_user_input.data.jsonDocResponse.data.attrid', 'replaced_mode', 'trip_user_input.data.jsonDocResponse.data.meta.deprecatedID'], dtype='object') DEBUG:root:After filtering user_input_cols=['mode_confirm', 'purpose_confirm', 'trip_user_input.data.label', 'trip_user_input.data.name', 'trip_user_input.data.version', 'trip_user_input.data.jsonDocResponse.data.attr.id', 'trip_user_input.data.jsonDocResponse.data.start', 'trip_user_input.data.jsonDocResponse.data.end', 'trip_user_input.data.jsonDocResponse.data.destination_purpose', 'trip_user_input.data.jsonDocResponse.data.travel_mode', 'trip_user_input.data.jsonDocResponse.data.Total_people_in_trip_party', 'trip_user_input.data.jsonDocResponse.data.Non_household_member_s_on_trip', 'trip_user_input.data.jsonDocResponse.data.Vehicle_trip_Parking_location', 'trip_user_input.data.jsonDocResponse.data.Parking_cost', 'trip_user_input.data.jsonDocResponse.data.Total_toll_charges_p_uring_the_trip_AUD', 'trip_user_input.data.jsonDocResponse.data.Transit_fees_AUD', 'trip_user_input.data.jsonDocResponse.data.Taxi_fees', 'trip_user_input.data.jsonDocResponse.data.meta.instanceID', 'trip_user_input.data.start_ts', 'trip_user_input.data.end_ts', 'trip_user_input.data.start_fmt_time', 'trip_user_input.data.end_fmt_time', 'trip_user_input.data.jsonDocResponse.data.attrid', 'replaced_mode', 'trip_user_input.data.jsonDocResponse.data.meta.deprecatedID'] DEBUG:root:allowed trip columns are {'data.end_fmt_time', 'data.start_fmt_time', 'data.primary_sensed_mode', 'data.primary_ble_sensed_mode', 'data.distance_miles', 'data.distance_km', 'user_id', 'data.distance_meters', 'data.duration_seconds', 'data.start_local_dt', 'data.distance', 'data.end_local_dt', 'data.primary_predicted_mode', 'data.duration'} DEBUG:root:get_all_trip_columns: after additional columns, curr set is {'data.end_fmt_time', 'data.end_loc.coordinates', 'data.distance_km', 'data.start_local_dt', 'data.start_fmt_time', 'data.primary_predicted_mode', 'data.primary_ble_sensed_mode', 'data.primary_sensed_mode', 'data.start_loc.coordinates', 'data.distance_miles', 'user_id', 'data.distance_meters', 'data.duration_seconds', 'data.distance', 'data.end_local_dt', 'data.duration'} DEBUG:root:Combined list combined_col_list=['data.end_fmt_time', 'data.end_loc.coordinates', 'data.distance_km', 'data.start_local_dt', 'data.start_fmt_time', 'data.primary_predicted_mode', 'data.primary_ble_sensed_mode', 'data.primary_sensed_mode', 'data.start_loc.coordinates', 'data.distance_miles', 'user_id', 'data.distance_meters', 'data.duration_seconds', 'data.distance', 'data.end_local_dt', 'data.duration', 'mode_confirm', 'purpose_confirm', 'trip_user_input.data.label', 'trip_user_input.data.name', 'trip_user_input.data.version', 'trip_user_input.data.jsonDocResponse.data.attr.id', 'trip_user_input.data.jsonDocResponse.data.start', 'trip_user_input.data.jsonDocResponse.data.end', 'trip_user_input.data.jsonDocResponse.data.destination_purpose', 'trip_user_input.data.jsonDocResponse.data.travel_mode', 'trip_user_input.data.jsonDocResponse.data.Total_people_in_trip_party', 'trip_user_input.data.jsonDocResponse.data.Non_household_member_s_on_trip', 'trip_user_input.data.jsonDocResponse.data.Vehicle_trip_Parking_location', 'trip_user_input.data.jsonDocResponse.data.Parking_cost', 'trip_user_input.data.jsonDocResponse.data.Total_toll_charges_p_uring_the_trip_AUD', 'trip_user_input.data.jsonDocResponse.data.Transit_fees_AUD', 'trip_user_input.data.jsonDocResponse.data.Taxi_fees', 'trip_user_input.data.jsonDocResponse.data.meta.instanceID', 'trip_user_input.data.start_ts', 'trip_user_input.data.end_ts', 'trip_user_input.data.start_fmt_time', 'trip_user_input.data.end_fmt_time', 'trip_user_input.data.jsonDocResponse.data.attrid', 'replaced_mode', 'trip_user_input.data.jsonDocResponse.data.meta.deprecatedID'] DEBUG:root:After filtering against the combined list df.columns=Index(['data.end_fmt_time', 'data.end_loc.coordinates', 'data.start_fmt_time', 'data.primary_predicted_mode', 'data.primary_sensed_mode', 'data.start_loc.coordinates', 'user_id', 'data.distance', 'data.duration', 'mode_confirm', 'purpose_confirm', 'trip_user_input.data.label', 'trip_user_input.data.name', 'trip_user_input.data.version', 'trip_user_input.data.jsonDocResponse.data.attr.id', 'trip_user_input.data.jsonDocResponse.data.start', 'trip_user_input.data.jsonDocResponse.data.end', 'trip_user_input.data.jsonDocResponse.data.destination_purpose', 'trip_user_input.data.jsonDocResponse.data.travel_mode', 'trip_user_input.data.jsonDocResponse.data.Total_people_in_trip_party', 'trip_user_input.data.jsonDocResponse.data.Non_household_member_s_on_trip', 'trip_user_input.data.jsonDocResponse.data.Vehicle_trip_Parking_location', 'trip_user_input.data.jsonDocResponse.data.Parking_cost', 'trip_user_input.data.jsonDocResponse.data.Total_toll_charges_p_uring_the_trip_AUD', 'trip_user_input.data.jsonDocResponse.data.Transit_fees_AUD', 'trip_user_input.data.jsonDocResponse.data.Taxi_fees', 'trip_user_input.data.jsonDocResponse.data.meta.instanceID', 'trip_user_input.data.start_ts', 'trip_user_input.data.end_ts', 'trip_user_input.data.start_fmt_time', 'trip_user_input.data.end_fmt_time', 'trip_user_input.data.jsonDocResponse.data.attrid', 'replaced_mode', 'trip_user_input.data.jsonDocResponse.data.meta.deprecatedID'], dtype='object') INFO:werkzeug:192.168.65.1 - - [08/Jul/2024 05:56:22] "GET /_reload-hash HTTP/1.1" 200 - INFO:werkzeug:192.168.65.1 - - [08/Jul/2024 05:56:22] "POST /_dash-update-component HTTP/1.1" 200 - DEBUG:root:allowed trip columns are {'data.end_fmt_time', 'data.start_fmt_time', 'data.primary_sensed_mode', 'data.primary_ble_sensed_mode', 'data.distance_miles', 'data.distance_km', 'user_id', 'data.distance_meters', 'data.duration_seconds', 'data.start_local_dt', 'data.distance', 'data.end_local_dt', 'data.primary_predicted_mode', 'data.duration'} DEBUG:root:Final list of retained cols columns={'trip_user_input.data.jsonDocResponse.data.attrid', 'trip_user_input.data.start_ts', 'trip_user_input.data.name', 'trip_user_input.data.version', 'trip_user_input.data.jsonDocResponse.data.destination_purpose', 'trip_user_input.data.jsonDocResponse.data.travel_mode', 'data.start_local_dt', 'trip_user_input.data.jsonDocResponse.data.Taxi_fees', 'data.primary_predicted_mode', 'trip_user_input.data.label', 'data.primary_sensed_mode', 'trip_user_input.data.jsonDocResponse.data.attr.id', 'user_id', 'data.distance_meters', 'data.duration_seconds', 'trip_user_input.data.start_fmt_time', 'data.duration', 'data.end_fmt_time', 'replaced_mode', 'trip_user_input.data.jsonDocResponse.data.Total_toll_charges_p_uring_the_trip_AUD', 'trip_user_input.data.jsonDocResponse.data.Vehicle_trip_Parking_location', 'trip_user_input.data.jsonDocResponse.data.meta.deprecatedID', 'data.distance_km', 'trip_user_input.data.jsonDocResponse.data.meta.instanceID', 'trip_user_input.data.end_fmt_time', 'trip_user_input.data.jsonDocResponse.data.Non_household_member_s_on_trip', 'trip_user_input.data.jsonDocResponse.data.Parking_cost', 'data.start_fmt_time', 'mode_confirm', 'trip_user_input.data.jsonDocResponse.data.end', 'trip_user_input.data.end_ts', 'data.primary_ble_sensed_mode', 'trip_user_input.data.jsonDocResponse.data.Total_people_in_trip_party', 'data.distance_miles', 'data.distance', 'trip_user_input.data.jsonDocResponse.data.start', 'data.end_local_dt', 'purpose_confirm', 'trip_user_input.data.jsonDocResponse.data.Transit_fees_AUD'} DEBUG:root:Before dropping, df.columns=Index(['data.end_fmt_time', 'data.end_loc.coordinates', 'data.start_fmt_time', 'data.primary_predicted_mode', 'data.primary_sensed_mode', 'data.start_loc.coordinates', 'user_id', 'data.distance', 'data.duration', 'mode_confirm', 'purpose_confirm', 'trip_user_input.data.label', 'trip_user_input.data.name', 'trip_user_input.data.version', 'trip_user_input.data.jsonDocResponse.data.attr.id', 'trip_user_input.data.jsonDocResponse.data.start', 'trip_user_input.data.jsonDocResponse.data.end', 'trip_user_input.data.jsonDocResponse.data.destination_purpose', 'trip_user_input.data.jsonDocResponse.data.travel_mode', 'trip_user_input.data.jsonDocResponse.data.Total_people_in_trip_party', 'trip_user_input.data.jsonDocResponse.data.Non_household_member_s_on_trip', 'trip_user_input.data.jsonDocResponse.data.Vehicle_trip_Parking_location', 'trip_user_input.data.jsonDocResponse.data.Parking_cost', 'trip_user_input.data.jsonDocResponse.data.Total_toll_charges_p_uring_the_trip_AUD', 'trip_user_input.data.jsonDocResponse.data.Transit_fees_AUD', 'trip_user_input.data.jsonDocResponse.data.Taxi_fees', 'trip_user_input.data.jsonDocResponse.data.meta.instanceID', 'trip_user_input.data.start_ts', 'trip_user_input.data.end_ts', 'trip_user_input.data.start_fmt_time', 'trip_user_input.data.end_fmt_time', 'trip_user_input.data.jsonDocResponse.data.attrid', 'replaced_mode', 'trip_user_input.data.jsonDocResponse.data.meta.deprecatedID', 'trip_start_time_str', 'trip_end_time_str', 'start_coordinates', 'end_coordinates', 'data.distance_meters', 'data.distance_km', 'data.duration_seconds', 'data.distance_miles'], dtype='object') DEBUG:root:After dropping, df.columns=Index(['data.end_fmt_time', 'data.start_fmt_time', 'data.primary_predicted_mode', 'data.primary_sensed_mode', 'user_id', 'data.distance', 'data.duration', 'mode_confirm', 'purpose_confirm', 'trip_user_input.data.label', 'trip_user_input.data.name', 'trip_user_input.data.version', 'trip_user_input.data.jsonDocResponse.data.attr.id', 'trip_user_input.data.jsonDocResponse.data.start', 'trip_user_input.data.jsonDocResponse.data.end', 'trip_user_input.data.jsonDocResponse.data.destination_purpose', 'trip_user_input.data.jsonDocResponse.data.travel_mode', 'trip_user_input.data.jsonDocResponse.data.Total_people_in_trip_party', 'trip_user_input.data.jsonDocResponse.data.Non_household_member_s_on_trip', 'trip_user_input.data.jsonDocResponse.data.Vehicle_trip_Parking_location', 'trip_user_input.data.jsonDocResponse.data.Parking_cost', 'trip_user_input.data.jsonDocResponse.data.Total_toll_charges_p_uring_the_trip_AUD', 'trip_user_input.data.jsonDocResponse.data.Transit_fees_AUD', 'trip_user_input.data.jsonDocResponse.data.Taxi_fees', 'trip_user_input.data.jsonDocResponse.data.meta.instanceID', 'trip_user_input.data.start_ts', 'trip_user_input.data.end_ts', 'trip_user_input.data.start_fmt_time', 'trip_user_input.data.end_fmt_time', 'trip_user_input.data.jsonDocResponse.data.attrid', 'replaced_mode', 'trip_user_input.data.jsonDocResponse.data.meta.deprecatedID', 'data.distance_meters', 'data.distance_km', 'data.duration_seconds', 'data.distance_miles'], dtype='object') ``` --- app_sidebar_collapsible.py | 3 ++- pages/data.py | 4 ++++ utils/constants.py | 3 --- utils/db_utils.py | 21 +++++++++++++++++---- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index eda3a73..9bd342a 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -311,12 +311,13 @@ def update_store_demographics(start_date, end_date, timezone, excluded_uuids): ) def update_store_trips(start_date, end_date, timezone, excluded_uuids): (start_date, end_date) = iso_to_date_only(start_date, end_date) - df = query_confirmed_trips(start_date, end_date, timezone) + df, user_input_cols = query_confirmed_trips(start_date, end_date, timezone) records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"]) # logging.debug("returning records %s" % records[0:2]) store = { "data": records, "length": len(records), + "userinputcols": user_input_cols } return store diff --git a/pages/data.py b/pages/data.py index 27b4e60..f658202 100644 --- a/pages/data.py +++ b/pages/data.py @@ -74,12 +74,16 @@ def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_de columns.update( col['label'] for col in perm_utils.get_allowed_named_trip_columns() ) + columns.update(store_trips["userinputcols"]) has_perm = perm_utils.has_permission('data_trips') df = pd.DataFrame(data) if df.empty or not has_perm: return None + logging.debug(f"Final list of retained cols {columns=}") + logging.debug(f"Before dropping, {df.columns=}") df = df.drop(columns=[col for col in df.columns if col not in columns]) + logging.debug(f"After dropping, {df.columns=}") df = clean_location_data(df) trips_table = populate_datatable(df,'trips-table') diff --git a/utils/constants.py b/utils/constants.py index d75137c..877c020 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -24,9 +24,6 @@ "data.primary_sensed_mode", "data.primary_predicted_mode", "data.primary_ble_sensed_mode", - "mode_confirm", - "purpose_confirm", - "replaced_mode", "user_id" ] diff --git a/utils/db_utils.py b/utils/db_utils.py index ca8a302..cff9cb5 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -72,6 +72,7 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): df = ts.get_data_df("analysis/confirmed_trip", time_query=estt.TimeQuery("data.start_ts", start_ts, end_ts), ) + user_input_cols = [] logging.debug("Before filtering, df columns are %s" % df.columns) if not df.empty: @@ -105,10 +106,22 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): logging.debug("No BLE support found, not fleet version, ignoring...") # Expand the user inputs - df = pd.concat([df, pd.json_normalize(df.user_input)], axis='columns') - - columns = [col for col in perm_utils.get_all_trip_columns() if col in df.columns] + user_input_df = pd.json_normalize(df.user_input) + df = pd.concat([df, user_input_df], axis='columns') + logging.debug(f"Before filtering {user_input_df.columns=}") + user_input_cols = [c for c in user_input_df.columns + if "metadata" not in c and + "xmlns" not in c and + "local_dt" not in c and + 'xmlResponse' not in c and + "_id" not in c] + logging.debug(f"After filtering {user_input_cols=}") + + combined_col_list = list(perm_utils.get_all_trip_columns()) + user_input_cols + logging.debug(f"Combined list {combined_col_list=}") + columns = [col for col in combined_col_list if col in df.columns] df = df[columns] + logging.debug(f"After filtering against the combined list {df.columns=}") # logging.debug("After getting all columns, they are %s" % df.columns) for col in constants.BINARY_TRIP_COLS: if col in df.columns: @@ -140,7 +153,7 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # logging.debug("After filtering, df columns are %s" % df.columns) # logging.debug("After filtering, the actual data is %s" % df.head()) # logging.debug("After filtering, the actual data is %s" % df.head().trip_start_time_str) - return df + return (df, user_input_cols) def query_demographics(): # Returns dictionary of df where key represent differnt survey id and values are df for each survey From 0de3ccb54ee76fef78acc35d20300c36707e2597 Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Mon, 8 Jul 2024 11:46:45 -0700 Subject: [PATCH 6/8] Clarify/fix comments --- utils/db_utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index cff9cb5..555ed5d 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -79,10 +79,7 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # Since we use `get_data_df` instead of `pd.json_normalize`, # we lose the "data" prefix on the fields and they are only flattened one level # Here, we restore the prefix for the VALID_TRIP_COLS from constants.py - # for backwards compatibility. We do this for all columns EXCEPT: - # 1. the coordinates, which we will have to pull out from the geojson anyway - # 2. the user_id, which doesn't need to be copied - # 3. the primary modes, which have not yet been populated + # for backwards compatibility. We do this for all columns since columns which don't exist are ignored by the rename command. rename_cols = constants.VALID_TRIP_COLS # the mapping is `{distance: data.distance, duration: data.duration} etc rename_mapping = dict(zip([c.replace("data.", "") for c in rename_cols], rename_cols)) @@ -94,7 +91,7 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): df['data.start_loc.coordinates'] = df['start_loc'].apply(lambda g: g["coordinates"]) df['data.end_loc.coordinates'] = df['end_loc'].apply(lambda g: g["coordinates"]) - # Add sensed, inferred and ble summaries. Note that we do this + # Add primary modes from the sensed, inferred and ble summaries. Note that we do this # **before** filtering the `all_trip_columns` because the # *_section_summary columns are not currently valid get_max_mode_from_summary = lambda md: max(md["distance"], key=md["distance"].get) if len(md["distance"]) > 0 else "INVALID" From f49a4354f78f49355e518f0f7e5464cf262bc544 Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Mon, 8 Jul 2024 17:44:00 -0700 Subject: [PATCH 7/8] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20up=20versions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 050e580..cd63a83 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2024-04-15--53-23 +FROM shankari/e-mission-server:master_2024-06-25--47-25 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From c6d150758c538e8d82de6c702a156b0b818c207b Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Thu, 18 Jul 2024 18:23:28 -0700 Subject: [PATCH 8/8] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20up=20the=20base?= =?UTF-8?q?=20image?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index cd63a83..188b1ac 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2024-06-25--47-25 +FROM shankari/e-mission-server:master_2024-07-19--34-43 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050