Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env_example
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
HOST=0.0.0.0
PORT=8050
USER_FILE_PATH='file/path/user.json'
USER_FILE_PATH='file/path/user.json'
ASSETS_PATH='app/assets/'
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ data/
./.pytest_cache
.env
.vscode
.pytest_cache
.pytest_cache
app/assets/
116 changes: 80 additions & 36 deletions app/analytics/UserAnalytics.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# from ip2geotools.databases.noncommercial import DbIpCity
from datetime import datetime
import json
import pandas as pd
import os

class UserAnalytics:
def __init__(self):
self.assets_path = os.environ.get("ASSETS_PATH")
self.user_file_path = os.environ.get("USER_FILE_PATH")
if self.user_file_path is None:
raise Exception("USER_FILE_PATH environment variable is not set.")
Expand All @@ -16,6 +16,11 @@ def __init__(self):
user_data = json.load(file)

self.user_data = user_data

def get_media_file_paths(self):
assets_dir = self.assets_path
jpg_files = [f for f in os.listdir(assets_dir) if f.endswith(".jpg")]
return jpg_files

def get_account_data(self):
return self.user_data["account"]
Expand All @@ -32,38 +37,77 @@ def get_preferences_data(self):
def get_location_data(self):
return self.user_data["location"]

def build_user_location_dict(self):
location = self.get_location_data()
user_location = {}

user_location["city"] = location["sublocality"]
user_location["latitude"] = location["latitude"]
user_location["longitude"] = location["longitude"]
user_location["country"] = location["country"]
user_location["neighborhood"] = location["neighborhood"]
user_location["locality"] = location["locality"]

return user_location

def build_user_summary_dict(self):
profile_data = self.get_profile_data()
account_data = self.get_account_data()
user_summary = {}

# get profile data
user_summary["first_name"] = profile_data["first_name"]
user_summary["age"] = profile_data["age"]
# convert height in cm to inches and ft
feet, inches = _convert_height(profile_data["height_centimeters"])
user_summary["height_feet"] = feet
user_summary["height_inches"] = inches
user_summary["gender"] = profile_data["gender"]
user_summary["ethnicities"] = profile_data["ethnicities"]
user_summary["religions"] = profile_data["religions"]
user_summary["job_title"] = profile_data["job_title"]
user_summary["workplaces"] = profile_data["workplaces"]
user_summary["education_attained"] = profile_data["education_attained"]
user_summary["hometowns"] = profile_data["hometowns"]
user_summary["languages_spoken"] = profile_data["languages_spoken"]
user_summary["politics"] = profile_data["politics"]
user_summary["pets"] = profile_data["pets"]
user_summary["relationship_types"] = profile_data["relationship_types"]
user_summary["dating_intention"] = profile_data["dating_intention"]

# capture duration paused and on app time
user_summary["last_pause_duration"] = _timestamp_durations(
leading_timestamp=account_data["last_unpause_time"],
lagging_timestamp=account_data["last_pause_time"])

user_summary["on_app_duration"] = _timestamp_durations(
leading_timestamp=account_data["last_seen"],
lagging_timestamp=account_data["signup_time"],
lag_has_microseconds=True)

return user_summary

def _convert_height(cm):
inches = cm / 2.54
feet = int(inches // 12) # whole feet
# remaining inches, rounded to 1 decimal place
remaining_inches = round(inches % 12, 1)

return feet, remaining_inches

def _timestamp_durations(leading_timestamp, lagging_timestamp, lag_has_microseconds=False):
lead_dt_format = "%Y-%m-%d %H:%M:%S"
lag_dt_format = lead_dt_format

# the signup_time contains microseconds, so this handles that special format
if lag_has_microseconds:
lag_dt_format = "%Y-%m-%d %H:%M:%S.%f"

# parse timestamps
lag_time = datetime.strptime(lagging_timestamp, lag_dt_format)
lead_time = datetime.strptime(leading_timestamp, lead_dt_format)

# calculate difference in days
days_difference = (lead_time - lag_time).days

# def parse_user_ip_addresses(file_path='data/export/user.json'):
# """
# Parses the IP addresses out of the user data and gets latitude and longitude coordinates from the IP addresses.
# This is only grabbing a subset of the IP addresses because the full set of data takes too long.
# :return: a DataFrame with latitude and longitude coordinates
# """
# json_file_path = file_path

# # opening json file
# with open(json_file_path, 'r') as file:
# # raw data is a list of dictionaries "list of interactions with a person"
# raw_data = json.load(file)

# device_value = []
# # parse just the device records
# if 'devices' in raw_data:
# values = raw_data['devices']
# device_value = values

# # extract the IP addresses
# ip_addresses = [entry['ip_address'] for entry in device_value]

# lats = []
# longs = []
# # lookup the latitude and longitude coordinates of each IP address
# # TODO: this API call doesn't work super well, replace it
# # for ip in ip_addresses[:100]:
# # coord = DbIpCity.get(ip, api_key="free")
# # lats.append(coord.latitude)
# # longs.append(coord.longitude)

# # define column names and create a DataFrame
# coordinates = pd.DataFrame({'latitude': lats, 'longitude': longs})
# return coordinates
return days_difference
76 changes: 38 additions & 38 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,38 +79,38 @@
dmc.Space(h=30)]),

# section for uploading files
html.Div([
dmc.Text("Upload Files", style={"fontSize": 28}, weight=500),
dmc.Text("Upload the `matches.json` and the `user.json` files from the zipped Hinge export for analysis."),
dmc.Space(h=20),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px',
"fontSize": 20,
'font-family': "Open Sans, verdana, arial, sans-serif"
},
# Allow multiple files to be uploaded
multiple=True
),
html.Div(id='output-data-upload')
]),
# html.Div([
# dmc.Text("Upload Files", style={"fontSize": 28}, weight=500),
# dmc.Text("Upload the `matches.json` and the `user.json` files from the zipped Hinge export for analysis."),
# dmc.Space(h=20),
# dcc.Upload(
# id='upload-data',
# children=html.Div([
# 'Drag and Drop or ',
# html.A('Select Files')
# ]),
# style={
# 'width': '100%',
# 'height': '60px',
# 'lineHeight': '60px',
# 'borderWidth': '1px',
# 'borderStyle': 'dashed',
# 'borderRadius': '5px',
# 'textAlign': 'center',
# 'margin': '10px',
# "fontSize": 20,
# 'font-family': "Open Sans, verdana, arial, sans-serif"
# },
# # Allow multiple files to be uploaded
# multiple=True
# ),
# html.Div(id='output-data-upload')
# ]),

# show links to the other pages
dmc.Text("Data Insights", style={"fontSize": 28}, weight=500),
dmc.Text("After uploading your data files, you can click on the page links below to see insights "
"from the data provided by Hinge."),
# dmc.Text("After uploading your data files, you can click on the page links below to see insights "
# "from the data provided by Hinge."),
dmc.Space(h=10),
html.Div([
html.Div(
Expand Down Expand Up @@ -146,19 +146,19 @@ def parse_uploaded_file_contents(list_of_file_contents, list_of_file_names):
])


@callback(Output('output-data-upload', 'children'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'))
def update_output(list_of_contents, list_of_names):
if list_of_contents is not None:
children = [
parse_uploaded_file_contents(list_of_contents, list_of_names)]
return children
# @callback(Output('output-data-upload', 'children'),
# Input('upload-data', 'contents'),
# State('upload-data', 'filename'))
# def update_output(list_of_contents, list_of_names):
# if list_of_contents is not None:
# children = [
# parse_uploaded_file_contents(list_of_contents, list_of_names)]
# return children


if __name__ == '__main__':
host = os.environ.get("HOST")
port = int(os.environ.get("PORT", 8050))

logger.info(f"Starting the Hinge Data Analysis app on {host}:{port}...")
logger.info(f"Running the Hinge Data Analysis app on {host}:{port}...")
app.run(debug=True, host=host, port=port)
Loading