diff --git a/app/analytics/UserAnalytics.py b/app/analytics/UserAnalytics.py index 74a9e97..e5156fe 100644 --- a/app/analytics/UserAnalytics.py +++ b/app/analytics/UserAnalytics.py @@ -5,12 +5,14 @@ import pandas as pd import json import os +import shutil class UserAnalytics: def __init__(self): self.assets_path = os.environ.get("ASSETS_PATH") self.user_file_path = os.environ.get("USER_FILE_PATH") self.geo_lite_db_path = os.environ.get("GEOLITE_DB_PATH") + self.media_path = os.environ.get("MEDIA_PATH") # TODO: come back and fix this # if self.geo_lite_db_path is None: @@ -28,8 +30,10 @@ def __init__(self): self.user_data = user_data def get_media_file_paths(self): - assets_dir = self.assets_path - jpg_files = [f for f in os.listdir(assets_dir) if f.endswith(".jpg")] + # need to copy the files from the media_path to the assets_dir + _copy_files(self.media_path, self.assets_path) + + jpg_files = [f for f in os.listdir(self.assets_path) if f.endswith(".jpg")] return jpg_files def get_account_data(self): @@ -51,12 +55,12 @@ def build_user_location_dict(self): location = self.get_location_data() user_location = {} - user_location["city"] = location["sublocality"] + user_location["city"] = location["cbsa"].split(",")[0] user_location["latitude"] = location["latitude"] user_location["longitude"] = location["longitude"] - user_location["country"] = location["country"] + user_location["country"] = location["country_short"] user_location["neighborhood"] = location["neighborhood"] - user_location["locality"] = location["locality"] + user_location["locality"] = location["admin_area_1_short"] return user_location @@ -86,14 +90,17 @@ def build_user_summary_dict(self): user_summary["dating_intention"] = profile_data["dating_intention"] # capture duration paused and on app time - user_summary["last_pause_duration"] = _timestamp_durations( - leading_timestamp=account_data["last_unpause_time"], - lagging_timestamp=account_data["last_pause_time"]) + # the pause times are only present if the user has paused the app, so have to check their presence first + if "last_unpause_time" in account_data and "last_pause_time" in account_data: + user_summary["last_pause_duration"] = _timestamp_durations( + leading_timestamp=account_data["last_unpause_time"], + lagging_timestamp=account_data["last_pause_time"]) + else: + user_summary["last_pause_duration"] = 0 user_summary["on_app_duration"] = _timestamp_durations( leading_timestamp=account_data["last_seen"], - lagging_timestamp=account_data["signup_time"], - lag_has_microseconds=True) + lagging_timestamp=account_data["signup_time"]) return user_summary @@ -157,6 +164,18 @@ def collect_location_from_ip(self): geolocation_data = [self._get_city_info(ip) for ip in ip_addresses if self._get_city_info(ip) is not None] return pd.DataFrame(geolocation_data) + +def _copy_files(src_dir, dest_dir): + os.makedirs(dest_dir, exist_ok=True) + + # loop through all files in source directory + for file_name in os.listdir(src_dir): + src_path = os.path.join(src_dir, file_name) + dest_path = os.path.join(dest_dir, file_name) + + # only copy files (not subdirectories) + if os.path.isfile(src_path): + shutil.copy2(src_path, dest_path) # copy2 preserves metadata def _get_city_info(self, ip): # initialize GeoLite2 reader & geocoder @@ -192,13 +211,9 @@ def _convert_height(cm): return feet, remaining_inches -def _timestamp_durations(leading_timestamp, lagging_timestamp, lag_has_microseconds=False): - lead_dt_format = "%Y-%m-%d %H:%M:%S" - lag_dt_format = lead_dt_format - - # the signup_time contains microseconds, so this handles that special format - if lag_has_microseconds: - lag_dt_format = "%Y-%m-%d %H:%M:%S.%f" +def _timestamp_durations(leading_timestamp, lagging_timestamp): + lead_dt_format = "%Y-%m-%d %H:%M:%S.%f" + lag_dt_format = "%Y-%m-%d %H:%M:%S.%f" # parse timestamps lag_time = datetime.strptime(lagging_timestamp, lag_dt_format) diff --git a/app/app.py b/app/app.py index ac302b0..bb8a6e5 100644 --- a/app/app.py +++ b/app/app.py @@ -29,19 +29,16 @@ def get_additional_text(page_name): """Helper function to provide context about the different hyperlinks based on the page name.""" if page_name == "Info": - return "Discover detailed insights about the app's features and functionality. This section offers a comprehensive" \ - " overview of how the project works, what data is available to the user, and how to navigate the app for the best experience." + return "Discover detailed insights about the app's features and functionality." elif page_name == "Matches": - return "Explore in-depth analyses of the users matches and interactions. This section reveals patterns in the user's matching " \ - "behavior, preferences, and key factors that influence successful connections with potential matches." + return "Explore in-depth analyses of the users matches and interactions." elif page_name == "User": - return "Analyze the user's personal profile and preferences. This section contains insights into how the user's " \ - "profile is presented, the preferences they've set, and how their interactions shape their experience on the app." - + return "Analyze the user's personal profile and preferences." app.layout = html.Div([ - dmc.Title('Hinge Data Analysis', color="black", size="h1"), + dmc.Title('Hinge User Insights', align="center", color="black", size="h1"), + dmc.Space(h=10), + dmc.Text("Insights into a Hinge User's Experiences", align="center", style={"fontSize": 16}, weight=500, italic=True), dmc.Space(h=20), - dmc.Text("Insights into a Hinge User's Experiences", style={"fontSize": 24}, weight=500), dmc.Text("This project analyzes personal data exported from Hinge to provide valuable insights into the user's " "experiences on the platform. By examining the user's profile, dating preferences, and interactions with other users, " "the project aims to reveal patterns, trends, and meaningful statistics that enhance the understanding of how users " @@ -75,9 +72,6 @@ def get_additional_text(page_name): ]) - - - if __name__ == '__main__': host = os.environ.get("HOST") port = int(os.environ.get("PORT", 8050)) diff --git a/app/pages/InfoPage.py b/app/pages/InfoPage.py index ba260da..6b80601 100644 --- a/app/pages/InfoPage.py +++ b/app/pages/InfoPage.py @@ -2,6 +2,9 @@ import dash_mantine_components as dmc layout = html.Div([ + dmc.Text("Info", align="center", style={"fontSize": 28}, weight=500), + dmc.Text("This section offers a comprehensive overview of how the project works, what data is available to the user, and how to navigate the app for the best experience."), + dmc.Space(h=20), dmc.Space(h=10), dmc.Text("What This Is", style={"fontSize": 28}, weight=500), dmc.Text("This application is meant to help provide meaningful insights about interactions users had with " diff --git a/app/pages/MatchPage.py b/app/pages/MatchPage.py index 1c5f613..83c0fc3 100644 --- a/app/pages/MatchPage.py +++ b/app/pages/MatchPage.py @@ -12,12 +12,12 @@ def serve_layout(): return html.Div([ + # TODO: need to remove the button, but it has dependencies with the charts so will leave it until I can redo the charts html.Button('Reload Graphs', id='refresh-page', style={"fontSize": 16, 'font-family': "Open Sans, verdana, arial, sans-serif"}), dmc.Space(h=20), dmc.Text("Match Analytics", align="center", style={"fontSize": 28}, weight=500), - dmc.Text("This section contains insights about the interactions (likes, matches, chats, and unmatches) you've " - "had on Hinge."), + dmc.Text("This section reveals patterns in the user's matching behavior, preferences, and key factors that influence successful connections with potential matches."), dmc.Space(h=20), # funnel graph showing breakdown of interactions diff --git a/app/pages/UserPage.py b/app/pages/UserPage.py index c705529..605c80a 100644 --- a/app/pages/UserPage.py +++ b/app/pages/UserPage.py @@ -187,7 +187,7 @@ def user_photo_slideshow(): return dmc.Card( children=[ - dmc.Text("Photos", weight=750, size="lg"), + dmc.Text("User Uploaded Photos", align="center", weight=750, size="xl"), dmc.Space(h=10), html.Img(id="slideshow-image", style={"width": "100%", "borderRadius": "10px"}), # Image placeholder dcc.Interval(id="interval-component", interval=10000, n_intervals=0), @@ -228,7 +228,7 @@ def create_user_location_card(): return dmc.Card( children=[ dmc.Space(h=10), - dmc.Text("Location", weight=700, size="xl"), + dmc.Text("User Location", weight=700, align="center", size="xl"), dmc.Text(f"Country: {user_location['country']}", size="lg"), dmc.Text(f"Locality: {user_location['locality']}", size="lg"), dmc.Text(f"City: {user_location['city']}", size="lg"), @@ -248,10 +248,10 @@ def create_user_summary_card(): return dmc.Card( children=[ - dmc.Text(f"{user_summary['first_name']}", weight=750, size="xl"), + dmc.Text(f"{user_summary['first_name']}", align="center", weight=750, size="xl"), dmc.Text(f"Age: {user_summary['age']}", size="lg", color="dimmed"), - dmc.Text(f"Height (ft, in): {user_summary['height_feet'], user_summary['height_inches']}", size="lg", color="dimmed"), - dmc.Text(f"Gender: {user_summary['gender']}", size="lg", color="dimmed"), + dmc.Text(f"Height (ft, in): {user_summary['height_feet'], user_summary['height_inches']}", size="lg"), + dmc.Text(f"Gender: {user_summary['gender']}", size="lg"), dmc.Text(f"Ethnicities: {user_summary['ethnicities']}", size="lg"), dmc.Text(f"Religions: {user_summary['religions']}", size="lg"), dmc.Text(f"Job: {user_summary['job_title']}", size="lg"), @@ -273,6 +273,8 @@ def create_user_summary_card(): layout = html.Div([ dmc.Text("User Analytics", align="center", style={"fontSize": 28}, weight=500), + dmc.Space(h=10), + dmc.Text("This section contains insights into how the user's profile is presented, the preferences they've set, and how their interactions shape their experience on the app."), dmc.Space(h=20), dmc.Grid( children=[ @@ -293,7 +295,7 @@ def create_user_summary_card(): dmc.Space(h=120), disclosure_vs_privacy(), potential_misalignments(), - geolocation(), + # geolocation(), # TODO: this is causing issues with too many lookup calls stringency_vs_flexibility(), dmc.Space(h=50) ]) diff --git a/tests/analytics/test_UserAnalytics.py b/tests/analytics/test_UserAnalytics.py index df23e2b..07cda14 100644 --- a/tests/analytics/test_UserAnalytics.py +++ b/tests/analytics/test_UserAnalytics.py @@ -27,9 +27,9 @@ ], "account": { "signup_time": "2024-01-01 03:27:17.539", - "last_pause_time": "2020-09-04 03:04:32", - "last_unpause_time": "2020-09-10 16:53:40", - "last_seen": "2024-01-17 04:07:39", + "last_pause_time": "2020-09-04 03:04:32.765", + "last_unpause_time": "2020-09-10 16:53:40.324", + "last_seen": "2024-01-17 04:07:39.234", "device_platform": "ios", "device_os": "16.6.1", "device_model": "unknown", @@ -109,9 +109,9 @@ "location": { "latitude": 65.00, "longitude": 18.00, - "country": "Iceland", - "locality": "New York", - "sublocality": "Brooklyn", + "country_short": "US", + "admin_area_1_short": "NY", + "cbsa": "Brooklyn", "neighborhood": "Flatbush" } } @@ -186,7 +186,7 @@ def test_location_data(user_analytics): locations = user_analytics.get_location_data() assert locations["latitude"] == 65.00 assert locations["longitude"] == 18.00 - assert locations["country"] == "Iceland" + assert locations["country_short"] == "US" def test_build_user_summary_dict(user_analytics): result = user_analytics.build_user_summary_dict() @@ -208,9 +208,9 @@ def test_build_user_location_dict(user_analytics): assert result["city"] == "Brooklyn" assert result["latitude"] == 65.00 assert result["longitude"] == 18.00 - assert result["country"] == "Iceland" + assert result["country"] == "US" assert result["neighborhood"] == "Flatbush" - assert result["locality"] == "New York" + assert result["locality"] == "NY" def test_count_displayed_attributes(user_analytics): result = user_analytics.count_displayed_attributes()