Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env_example
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
HOST=0.0.0.0
PORT=8050
USER_FILE_PATH='file/path/user.json'
ASSETS_PATH='app/assets/'
ASSETS_PATH='app/assets/'
GEOLITE_DB_PATH='data/GeoLite2-City.mmdb'
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ Remove
The list of Matches provided by Hinge leaves a lot to be desired, which is why I decided to build this project analyzing and visualizing interesting insights from the Hinge data export.

## How To Run The App

### Setting Up GeoLite2 Database
1. Create a free MaxMind account: [MaxMind Signup](https://www.maxmind.com/en/geolite2/signup)
2. Download **GeoLite2-City.mmdb** from [MaxMind](https://www.maxmind.com/en/accounts/current/downloads)
3. Place `GeoLite2-City.mmdb` in the project "data" directory or update the script to point to its location.


The application is a multi page Dash Plotly application that runs in a Docker container on port `8050`. Create a Docker build image with: `docker compose build` and run the app with: `docker compose up -d`. The app will be available at [http://0.0.0.0:8050/](http://0.0.0.0:8050/). To bring the container down, use `docker compose down`.

The page will render with information about the app and instructions on how to use it.
Expand Down
37 changes: 37 additions & 0 deletions app/analytics/UserAnalytics.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from datetime import datetime
from collections import defaultdict
import geoip2.database
from geopy.geocoders import Nominatim
import pandas as pd
import json
import os

class UserAnalytics:
def __init__(self):
self.assets_path = os.environ.get("ASSETS_PATH")
self.user_file_path = os.environ.get("USER_FILE_PATH")
self.geo_lite_db_path = os.environ.get("GEOLITE_DB_PATH")
if self.user_file_path is None:
raise Exception("USER_FILE_PATH environment variable is not set.")

Expand Down Expand Up @@ -120,6 +124,39 @@ def count_displayed_attributes(self):
display_counts[category]["true" if display_value else "false"] += 1
return dict(display_counts)

def collect_location_from_ip(self):
device_data = self.get_devices_data()
ip_addresses = [device["ip_address"] for device in device_data]

geolocation_data = [self._get_city_info(ip) for ip in ip_addresses if self._get_city_info(ip) is not None]

return pd.DataFrame(geolocation_data)

def _get_city_info(self, ip):
# initialize GeoLite2 reader & geocoder
geolite_db_path = self.geo_lite_db_path
reader = geoip2.database.Reader(geolite_db_path)
geolocator = Nominatim(user_agent="geoip_mapper")
try:
response = reader.city(ip)
city = response.city.name
region = response.subdivisions.most_specific.name
country = response.country.name

# get latitude & longitude
location = geolocator.geocode(f"{city}, {region}, {country}")
if location:
return {
"ip": ip,
"city": city,
"region": region,
"country": country,
"latitude": location.latitude,
"longitude": location.longitude
}
except:
return None # invalid or private IP


def _convert_height(cm):
inches = cm / 2.54
Expand Down
33 changes: 33 additions & 0 deletions app/pages/UserPage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,38 @@

from analytics.UserAnalytics import UserAnalytics

def geolocation():
df = UserAnalytics().collect_location_from_ip()
fig = px.scatter_geo(
df,
lat="latitude",
lon="longitude",
text="city",
hover_name="ip",
hover_data=["region", "country"],
projection="orthographic" # this makes it a globe
)

fig.update_geos(
showland=True, landcolor="rgb(217, 217, 217)", # customize land color
showocean=True, oceancolor="rgb(204, 230, 255)", # customize ocean color
showcountries=True, countrycolor="rgb(255, 255, 255)" # show country borders
)
return dmc.Card(
children=[
dmc.Space(h=10),
dmc.Text("User Activity Across the Globe", weight=700, size="xl"),
dmc.Space(h=10),
dmc.Text("Where the user has logged onto the app based on the IP address collected from their device.", size="md"),
dmc.Space(h=10),
dcc.Graph(figure=fig)
],
withBorder=True,
shadow="sm",
radius="md",
style={"height": "520px"},
)

def potential_misalignments():
# define categories
categories = ["Religion", "Ethnicity", "Smoking", "Drinking", "Marijuana", "Drugs", "Children", "Family Plans", "Education", "Politics"]
Expand Down Expand Up @@ -204,5 +236,6 @@ def create_user_summary_card():
dmc.Space(h=120),
disclosure_vs_privacy(),
potential_misalignments(),
geolocation(),
dmc.Space(h=50)
])
14 changes: 14 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
aiohappyeyeballs==2.5.0
aiohttp==3.11.13
aiosignal==1.3.2
async-timeout==5.0.1
attrs==25.1.0
blinker==1.9.0
certifi==2025.1.31
charset-normalizer==3.4.1
Expand All @@ -9,20 +14,28 @@ dash-mantine-components==0.12.1
dash-table==5.0.0
exceptiongroup==1.2.2
Flask==3.0.3
frozenlist==1.5.0
geographiclib==2.0
geoip2==5.0.1
geopy==2.4.1
idna==3.10
importlib_metadata==8.6.1
iniconfig==2.0.0
itsdangerous==2.2.0
Jinja2==3.1.5
loguru==0.7.3
MarkupSafe==3.0.2
maxminddb==2.6.3
multidict==6.1.0
narwhals==1.26.0
nest-asyncio==1.6.0
numpy==2.0.2
packaging==24.2
pandas==2.2.3
plotly==6.0.0
pluggy==1.5.0
propcache==0.3.0
psycopg2-binary==2.9.10
pytest==8.3.4
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
Expand All @@ -35,4 +48,5 @@ typing_extensions==4.12.2
tzdata==2025.1
urllib3==2.3.0
Werkzeug==3.0.6
yarl==1.18.3
zipp==3.21.0
13 changes: 12 additions & 1 deletion tests/analytics/test_UserAnalytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
"device_model": "unknown",
"device_platform": "ios",
"device_os_versions": "16.5.1"
},
{
"ip_address": "130.279.438.00",
"device_model": "unknown",
"device_platform": "ios",
"device_os_versions": "16.5.1"
}
],
"account": {
Expand Down Expand Up @@ -208,4 +214,9 @@ def test_count_displayed_attributes(user_analytics):
def test_profile_preference_selections(user_analytics):
profile, prefs = user_analytics.profile_preference_selections()
assert len(profile) == len(prefs)
assert len(profile) == 10
assert len(profile) == 10

# TODO: this needs to be mocked out and better tests added
# def test_collect_location_from_ip(user_analytics):
# result = user_analytics.collect_location_from_ip()
# assert result is not None