From aa338d396790a7864833969f14f9d0144ea202b0 Mon Sep 17 00:00:00 2001 From: Shelby Potts Date: Sat, 22 Feb 2025 22:35:56 -0600 Subject: [PATCH 1/2] 3 new cards on the user page --- .env_example | 3 +- .gitignore | 3 +- app/analytics/UserAnalytics.py | 116 ++++++++++++------ app/app.py | 76 ++++++------ app/pages/UserPage.py | 164 ++++++++++++++++---------- tests/analytics/test_UserAnalytics.py | 45 ++++++- 6 files changed, 264 insertions(+), 143 deletions(-) diff --git a/.env_example b/.env_example index 6e042bc..23cd904 100644 --- a/.env_example +++ b/.env_example @@ -1,3 +1,4 @@ HOST=0.0.0.0 PORT=8050 -USER_FILE_PATH='file/path/user.json' \ No newline at end of file +USER_FILE_PATH='file/path/user.json' +ASSETS_PATH='app/assets/' \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4e7ff0b..2f0c8e5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ data/ ./.pytest_cache .env .vscode -.pytest_cache \ No newline at end of file +.pytest_cache +app/assets/ \ No newline at end of file diff --git a/app/analytics/UserAnalytics.py b/app/analytics/UserAnalytics.py index 3f6f9ac..9251d98 100644 --- a/app/analytics/UserAnalytics.py +++ b/app/analytics/UserAnalytics.py @@ -1,10 +1,10 @@ -# from ip2geotools.databases.noncommercial import DbIpCity +from datetime import datetime import json -import pandas as pd import os class UserAnalytics: def __init__(self): + self.assets_path = os.environ.get("ASSETS_PATH") self.user_file_path = os.environ.get("USER_FILE_PATH") if self.user_file_path is None: raise Exception("USER_FILE_PATH environment variable is not set.") @@ -16,6 +16,11 @@ def __init__(self): user_data = json.load(file) self.user_data = user_data + + def get_media_file_paths(self): + assets_dir = self.assets_path + jpg_files = [f for f in os.listdir(assets_dir) if f.endswith(".jpg")] + return jpg_files def get_account_data(self): return self.user_data["account"] @@ -32,38 +37,77 @@ def get_preferences_data(self): def get_location_data(self): return self.user_data["location"] + def build_user_location_dict(self): + location = self.get_location_data() + user_location = {} + + user_location["city"] = location["sublocality"] + user_location["latitude"] = location["latitude"] + user_location["longitude"] = location["longitude"] + user_location["country"] = location["country"] + user_location["neighborhood"] = location["neighborhood"] + user_location["locality"] = location["locality"] + + return user_location + + def build_user_summary_dict(self): + profile_data = self.get_profile_data() + account_data = self.get_account_data() + user_summary = {} + + # get profile data + user_summary["first_name"] = profile_data["first_name"] + user_summary["age"] = profile_data["age"] + # convert height in cm to inches and ft + feet, inches = _convert_height(profile_data["height_centimeters"]) + user_summary["height_feet"] = feet + user_summary["height_inches"] = inches + user_summary["gender"] = profile_data["gender"] + user_summary["ethnicities"] = profile_data["ethnicities"] + user_summary["religions"] = profile_data["religions"] + user_summary["job_title"] = profile_data["job_title"] + user_summary["workplaces"] = profile_data["workplaces"] + user_summary["education_attained"] = profile_data["education_attained"] + user_summary["hometowns"] = profile_data["hometowns"] + user_summary["languages_spoken"] = profile_data["languages_spoken"] + user_summary["politics"] = profile_data["politics"] + user_summary["pets"] = profile_data["pets"] + user_summary["relationship_types"] = profile_data["relationship_types"] + user_summary["dating_intention"] = profile_data["dating_intention"] + + # capture duration paused and on app time + user_summary["last_pause_duration"] = _timestamp_durations( + leading_timestamp=account_data["last_unpause_time"], + lagging_timestamp=account_data["last_pause_time"]) + + user_summary["on_app_duration"] = _timestamp_durations( + leading_timestamp=account_data["last_seen"], + lagging_timestamp=account_data["signup_time"], + lag_has_microseconds=True) + + return user_summary + +def _convert_height(cm): + inches = cm / 2.54 + feet = int(inches // 12) # whole feet + # remaining inches, rounded to 1 decimal place + remaining_inches = round(inches % 12, 1) + + return feet, remaining_inches + +def _timestamp_durations(leading_timestamp, lagging_timestamp, lag_has_microseconds=False): + lead_dt_format = "%Y-%m-%d %H:%M:%S" + lag_dt_format = lead_dt_format + + # the signup_time contains microseconds, so this handles that special format + if lag_has_microseconds: + lag_dt_format = "%Y-%m-%d %H:%M:%S.%f" + + # parse timestamps + lag_time = datetime.strptime(lagging_timestamp, lag_dt_format) + lead_time = datetime.strptime(leading_timestamp, lead_dt_format) + + # calculate difference in days + days_difference = (lead_time - lag_time).days -# def parse_user_ip_addresses(file_path='data/export/user.json'): -# """ -# Parses the IP addresses out of the user data and gets latitude and longitude coordinates from the IP addresses. -# This is only grabbing a subset of the IP addresses because the full set of data takes too long. -# :return: a DataFrame with latitude and longitude coordinates -# """ -# json_file_path = file_path - -# # opening json file -# with open(json_file_path, 'r') as file: -# # raw data is a list of dictionaries "list of interactions with a person" -# raw_data = json.load(file) - -# device_value = [] -# # parse just the device records -# if 'devices' in raw_data: -# values = raw_data['devices'] -# device_value = values - -# # extract the IP addresses -# ip_addresses = [entry['ip_address'] for entry in device_value] - -# lats = [] -# longs = [] -# # lookup the latitude and longitude coordinates of each IP address -# # TODO: this API call doesn't work super well, replace it -# # for ip in ip_addresses[:100]: -# # coord = DbIpCity.get(ip, api_key="free") -# # lats.append(coord.latitude) -# # longs.append(coord.longitude) - -# # define column names and create a DataFrame -# coordinates = pd.DataFrame({'latitude': lats, 'longitude': longs}) -# return coordinates + return days_difference \ No newline at end of file diff --git a/app/app.py b/app/app.py index 97425c4..0d110e4 100644 --- a/app/app.py +++ b/app/app.py @@ -79,38 +79,38 @@ dmc.Space(h=30)]), # section for uploading files - html.Div([ - dmc.Text("Upload Files", style={"fontSize": 28}, weight=500), - dmc.Text("Upload the `matches.json` and the `user.json` files from the zipped Hinge export for analysis."), - dmc.Space(h=20), - dcc.Upload( - id='upload-data', - children=html.Div([ - 'Drag and Drop or ', - html.A('Select Files') - ]), - style={ - 'width': '100%', - 'height': '60px', - 'lineHeight': '60px', - 'borderWidth': '1px', - 'borderStyle': 'dashed', - 'borderRadius': '5px', - 'textAlign': 'center', - 'margin': '10px', - "fontSize": 20, - 'font-family': "Open Sans, verdana, arial, sans-serif" - }, - # Allow multiple files to be uploaded - multiple=True - ), - html.Div(id='output-data-upload') - ]), + # html.Div([ + # dmc.Text("Upload Files", style={"fontSize": 28}, weight=500), + # dmc.Text("Upload the `matches.json` and the `user.json` files from the zipped Hinge export for analysis."), + # dmc.Space(h=20), + # dcc.Upload( + # id='upload-data', + # children=html.Div([ + # 'Drag and Drop or ', + # html.A('Select Files') + # ]), + # style={ + # 'width': '100%', + # 'height': '60px', + # 'lineHeight': '60px', + # 'borderWidth': '1px', + # 'borderStyle': 'dashed', + # 'borderRadius': '5px', + # 'textAlign': 'center', + # 'margin': '10px', + # "fontSize": 20, + # 'font-family': "Open Sans, verdana, arial, sans-serif" + # }, + # # Allow multiple files to be uploaded + # multiple=True + # ), + # html.Div(id='output-data-upload') + # ]), # show links to the other pages dmc.Text("Data Insights", style={"fontSize": 28}, weight=500), - dmc.Text("After uploading your data files, you can click on the page links below to see insights " - "from the data provided by Hinge."), + # dmc.Text("After uploading your data files, you can click on the page links below to see insights " + # "from the data provided by Hinge."), dmc.Space(h=10), html.Div([ html.Div( @@ -146,19 +146,19 @@ def parse_uploaded_file_contents(list_of_file_contents, list_of_file_names): ]) -@callback(Output('output-data-upload', 'children'), - Input('upload-data', 'contents'), - State('upload-data', 'filename')) -def update_output(list_of_contents, list_of_names): - if list_of_contents is not None: - children = [ - parse_uploaded_file_contents(list_of_contents, list_of_names)] - return children +# @callback(Output('output-data-upload', 'children'), +# Input('upload-data', 'contents'), +# State('upload-data', 'filename')) +# def update_output(list_of_contents, list_of_names): +# if list_of_contents is not None: +# children = [ +# parse_uploaded_file_contents(list_of_contents, list_of_names)] +# return children if __name__ == '__main__': host = os.environ.get("HOST") port = int(os.environ.get("PORT", 8050)) - logger.info(f"Starting the Hinge Data Analysis app on {host}:{port}...") + logger.info(f"Running the Hinge Data Analysis app on {host}:{port}...") app.run(debug=True, host=host, port=port) \ No newline at end of file diff --git a/app/pages/UserPage.py b/app/pages/UserPage.py index 7ac4f61..b64c4f3 100644 --- a/app/pages/UserPage.py +++ b/app/pages/UserPage.py @@ -1,76 +1,118 @@ -from dash import html +from dash import html, dcc, callback import dash_mantine_components as dmc -from dash import dcc, Input, Output, callback, dash_table import plotly.express as px -from dash.exceptions import PreventUpdate +from dash.dependencies import Input, Output, State from analytics.UserAnalytics import UserAnalytics -user_analytics = UserAnalytics() - - -layout = html.Div([ - html.Button('Reload Graphs', id='refresh-page', - style={"fontSize": 16, 'font-family': "Open Sans, verdana, arial, sans-serif"}), - dmc.Space(h=20), - dmc.Text("User Analytics", align="center", style={"fontSize": 28}, weight=500), - dmc.Text("This section contains insights about your user data that was collected while you were using Hinge."), - dmc.Space(h=20), - - # table showing account data - dmc.Text("User Account Info", size="xl", align="left", weight=500), - dmc.Text("This table shows the account data that was collected while you were using Hinge. This includes data " - "about when you downloaded the app, the last time you paused or unpaused the app, and the last time " - "you logged in.", align="left"), - dmc.Space(h=10), - html.Div([ - dash_table.DataTable(id='datatable-interactivity'), - html.Div(id='acct-datatable-interactivity-container'), - ]), - - dmc.Space(h=20), - # user latitude and longitude coordinates - dmc.Text("Where you've used the app", size="xl", align="left", weight=500), - dmc.Text("This takes the public IP addresses from the sessions where you used Hinge and uses that to look up the " - "latitude and longitude coordinates to show where you were when you were using the app. This is limited " - "to 100 sessions.", align="left"), - # TODO: figure out what to do with this map because it's god awful to run - dcc.Graph("live-update-coords-graph"), -]) +def user_photo_slideshow(): + jpg_files = UserAnalytics().get_media_file_paths() + print(jpg_files) + return dmc.Card( + children=[ + dmc.Text("Photos", weight=750, size="lg"), + dmc.Space(h=10), + html.Img(id="slideshow-image", style={"width": "100%", "borderRadius": "10px"}), # Image placeholder + dcc.Interval(id="interval-component", interval=10000, n_intervals=0), + dcc.Store(id="image-store", data=jpg_files) # Store images + ], + withBorder=True, + shadow="sm", + radius="md", + style={"width": "500px", "padding": "20px"}, + ) @callback( - Output('acct-datatable-interactivity-container', 'children'), - [Input('refresh-page', 'n_clicks')] + Output("slideshow-image", "src"), + Input("interval-component", "n_intervals"), + State("image-store", "data") # Get images dynamically from Store ) -def update_comment_table(data): - __check_for_live_update_data(data) +def update_image(n_intervals, jpg_files): + # NOTE: images have to the in an "assets" directory in the same folder as the app.py file + return f"assets/{jpg_files[n_intervals % len(jpg_files)]}" # Use relative path with /assets/ - account_data = user_analytics.get_account_data() - # passing in the account data as a list for the DataTable - return [ - dash_table.DataTable(data=[account_data], page_size=5, - style_cell={'textAlign': 'left'}) - ] +def create_user_location_card(): + user_location = UserAnalytics().build_user_location_dict() -# TODO: commenting this out until there is an alternative -# @callback( -# Output('live-update-coords-graph', 'figure'), -# [Input('refresh-page', 'n_clicks')] -# ) -# def update_coords_graph_live(data): -# __check_for_live_update_data(data) + fig = px.scatter_mapbox( + lat=[user_location["latitude"]], + lon=[user_location["longitude"]], + hover_name=[user_location["city"]], + zoom=10, + height=400 + ) -# # initial setup of the global events -# user_coordinates = ua.parse_user_ip_addresses() -# # create the funnel graph -# figure = px.scatter_geo(user_coordinates, locationmode="USA-states", lat="latitude", lon="longitude", -# projection="orthographic") -# return figure + # use Mapbox for styling + fig.update_layout( + mapbox_style="carto-positron", + mapbox_center={"lat": user_location["latitude"], "lon": user_location["longitude"]} + ) + return dmc.Card( + children=[ + dmc.Space(h=10), + dmc.Text("Location", weight=700, size="xl"), + dmc.Text(f"Country: {user_location['country']}", size="lg"), + dmc.Text(f"Locality: {user_location['locality']}", size="lg"), + dmc.Text(f"City: {user_location['city']}", size="lg"), + dmc.Text(f"Neighborhood: {user_location['neighborhood']}", size="lg"), + dmc.Space(h=10), + dcc.Graph(figure=fig) # map visualization + ], + withBorder=True, + shadow="sm", + radius="md", + style={"width": "500px", "height": "520px"}, + ) -# TODO: I don't like this this is repeated in both files, consolidate at some point -def __check_for_live_update_data(data): - if data is None: - raise PreventUpdate \ No newline at end of file +def create_user_summary_card(): + user_summary = UserAnalytics().build_user_summary_dict() + + return dmc.Card( + children=[ + dmc.Text(f"{user_summary['first_name']}", weight=750, size="xl"), + dmc.Text(f"Age: {user_summary['age']}", size="lg", color="dimmed"), + dmc.Text(f"Height (ft, in): {user_summary['height_feet'], user_summary['height_inches']}", size="lg", color="dimmed"), + dmc.Text(f"Gender: {user_summary['gender']}", size="lg", color="dimmed"), + dmc.Text(f"Ethnicities: {user_summary['ethnicities']}", size="lg"), + dmc.Text(f"Religions: {user_summary['religions']}", size="lg"), + dmc.Text(f"Job: {user_summary['job_title']}", size="lg"), + dmc.Text(f"Workplaces: {user_summary['workplaces']}", size="lg"), + dmc.Text(f"Education: {user_summary['education_attained']}", size="lg"), + dmc.Text(f"Hometown: {user_summary['hometowns']}", size="lg"), + dmc.Text(f"Politics: {user_summary['politics']}", size="lg"), + dmc.Text(f"Pets: {user_summary['pets']}", size="lg"), + dmc.Text(f"Relationship Types: {user_summary['relationship_types']}", size="lg"), + dmc.Text(f"Dating Intentions: {user_summary['dating_intention']}", size="lg"), + dmc.Text(f"Last Pause Duration: {user_summary['last_pause_duration']} days", size="lg"), + dmc.Text(f"On App Duration: {user_summary['on_app_duration']} days", size="lg"), + ], + withBorder=True, + shadow="sm", + radius="md", + style={"width": "500px", "padding": "20px", "height": "520px"}, + ) + +layout = html.Div([ + dmc.Text("User Analytics", align="center", style={"fontSize": 28}, weight=500), + dmc.Space(h=20), + dmc.Grid( + children=[ + dmc.Col( + user_photo_slideshow(), + span=4 + ), + dmc.Col( + create_user_summary_card(), + span=4, + ), + dmc.Col( + create_user_location_card(), + span=4 + ) + ], + style={"height": "50vh"} +) +]) diff --git a/tests/analytics/test_UserAnalytics.py b/tests/analytics/test_UserAnalytics.py index 7b817bf..06d3a15 100644 --- a/tests/analytics/test_UserAnalytics.py +++ b/tests/analytics/test_UserAnalytics.py @@ -20,9 +20,9 @@ } ], "account": { - "signup_time": "2001-06-29 03:27:17.539", - "last_pause_time": "2003-09-04 03:04:32", - "last_unpause_time": "2020-12-10 16:53:40", + "signup_time": "2024-01-01 03:27:17.539", + "last_pause_time": "2020-09-04 03:04:32", + "last_unpause_time": "2020-09-10 16:53:40", "last_seen": "2024-01-17 04:07:39", "device_platform": "ios", "device_os": "16.6.1", @@ -33,7 +33,13 @@ "profile": { "first_name": "Fake User", "age": 99, - "height_centimeters": 213 + "height_centimeters": 213, + "gender": "female", + "job_title": "Astronaut", + "education_attained": "Undergraduate", + "languages_spoken": "English", + "pets": "Dog", + "politics": "Prefer Not to Say" }, "preferences": { "distance_miles_max": 50, @@ -43,7 +49,9 @@ "location": { "latitude": 65.00, "longitude": 18.00, - "country": "Iceland" + "country": "Iceland", + "sublocality": "Brooklyn", + "neighborhood": "Flatbush", } } ''' @@ -113,4 +121,29 @@ def test_location_data(user_analytics): locations = user_analytics.get_location_data() assert locations["latitude"] == 65.00 assert locations["longitude"] == 18.00 - assert locations["country"] == "Iceland" \ No newline at end of file + assert locations["country"] == "Iceland" + +def test_build_user_summary_dict(user_analytics): + result = user_analytics.build_user_summary_dict() + assert result["first_name"] == "Fake User" + assert result["age"] == 99 + assert result["height_feet"] == 6 + assert result["height_inches"] == 11.9 + assert result["gender"] == "female" + assert result["job_title"] == "Astronaut" + assert result["education_attained"] == "Undergraduate" + assert result["languages_spoken"] == "English" + assert result["politics"] == "Prefer Not to Say" + assert result["pets"] == "Dog" + assert result["last_pause_duration"] == 6 + assert result["on_app_duration"] == 16 + +def test_build_user_location_dict(user_analytics): + result = user_analytics.build_user_location_dict() + assert result["city"] == "Brooklyn" + assert result["latitude"] == 65.00 + assert result["longitude"] == 18.00 + assert result["country"] == "Iceland" + assert result["neighborhood"] == "Flatbush" + assert result["locality"] == "New York" + \ No newline at end of file From 4d0a2ac0eed5db90a0bf67a3f39e3ce04218d325 Mon Sep 17 00:00:00 2001 From: Shelby Potts Date: Sun, 23 Feb 2025 07:13:24 -0600 Subject: [PATCH 2/2] fixed broken tests --- tests/analytics/test_UserAnalytics.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/analytics/test_UserAnalytics.py b/tests/analytics/test_UserAnalytics.py index 06d3a15..0667206 100644 --- a/tests/analytics/test_UserAnalytics.py +++ b/tests/analytics/test_UserAnalytics.py @@ -38,8 +38,14 @@ "job_title": "Astronaut", "education_attained": "Undergraduate", "languages_spoken": "English", + "ethnicities": "Prefer Not to Say", "pets": "Dog", - "politics": "Prefer Not to Say" + "politics": "Prefer Not to Say", + "religions": "Prefer Not to Say", + "hometowns": "moon", + "relationship_types": "Prefer Not to Say", + "dating_intention": "Prefer Not to Say", + "workplaces": "Space" }, "preferences": { "distance_miles_max": 50, @@ -50,8 +56,9 @@ "latitude": 65.00, "longitude": 18.00, "country": "Iceland", + "locality": "New York", "sublocality": "Brooklyn", - "neighborhood": "Flatbush", + "neighborhood": "Flatbush" } } '''