diff --git a/.env_example b/.env_example index 23cd904..5f5aa38 100644 --- a/.env_example +++ b/.env_example @@ -1,4 +1,5 @@ HOST=0.0.0.0 PORT=8050 USER_FILE_PATH='file/path/user.json' -ASSETS_PATH='app/assets/' \ No newline at end of file +ASSETS_PATH='app/assets/' +GEOLITE_DB_PATH='data/GeoLite2-City.mmdb' \ No newline at end of file diff --git a/README.md b/README.md index 3a1b81c..ed0090b 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,13 @@ Remove The list of Matches provided by Hinge leaves a lot to be desired, which is why I decided to build this project analyzing and visualizing interesting insights from the Hinge data export. ## How To Run The App + +### Setting Up GeoLite2 Database +1. Create a free MaxMind account: [MaxMind Signup](https://www.maxmind.com/en/geolite2/signup) +2. Download **GeoLite2-City.mmdb** from [MaxMind](https://www.maxmind.com/en/accounts/current/downloads) +3. Place `GeoLite2-City.mmdb` in the project "data" directory or update the script to point to its location. + + The application is a multi page Dash Plotly application that runs in a Docker container on port `8050`. Create a Docker build image with: `docker compose build` and run the app with: `docker compose up -d`. The app will be available at [http://0.0.0.0:8050/](http://0.0.0.0:8050/). To bring the container down, use `docker compose down`. The page will render with information about the app and instructions on how to use it. diff --git a/app/analytics/UserAnalytics.py b/app/analytics/UserAnalytics.py index ad7dbf2..7f0b60a 100644 --- a/app/analytics/UserAnalytics.py +++ b/app/analytics/UserAnalytics.py @@ -1,5 +1,8 @@ from datetime import datetime from collections import defaultdict +import geoip2.database +from geopy.geocoders import Nominatim +import pandas as pd import json import os @@ -7,6 +10,7 @@ class UserAnalytics: def __init__(self): self.assets_path = os.environ.get("ASSETS_PATH") self.user_file_path = os.environ.get("USER_FILE_PATH") + self.geo_lite_db_path = os.environ.get("GEOLITE_DB_PATH") if self.user_file_path is None: raise Exception("USER_FILE_PATH environment variable is not set.") @@ -120,6 +124,39 @@ def count_displayed_attributes(self): display_counts[category]["true" if display_value else "false"] += 1 return dict(display_counts) + def collect_location_from_ip(self): + device_data = self.get_devices_data() + ip_addresses = [device["ip_address"] for device in device_data] + + geolocation_data = [self._get_city_info(ip) for ip in ip_addresses if self._get_city_info(ip) is not None] + + return pd.DataFrame(geolocation_data) + + def _get_city_info(self, ip): + # initialize GeoLite2 reader & geocoder + geolite_db_path = self.geo_lite_db_path + reader = geoip2.database.Reader(geolite_db_path) + geolocator = Nominatim(user_agent="geoip_mapper") + try: + response = reader.city(ip) + city = response.city.name + region = response.subdivisions.most_specific.name + country = response.country.name + + # get latitude & longitude + location = geolocator.geocode(f"{city}, {region}, {country}") + if location: + return { + "ip": ip, + "city": city, + "region": region, + "country": country, + "latitude": location.latitude, + "longitude": location.longitude + } + except: + return None # invalid or private IP + def _convert_height(cm): inches = cm / 2.54 diff --git a/app/pages/UserPage.py b/app/pages/UserPage.py index c520dd4..ef917ce 100644 --- a/app/pages/UserPage.py +++ b/app/pages/UserPage.py @@ -6,6 +6,38 @@ from analytics.UserAnalytics import UserAnalytics +def geolocation(): + df = UserAnalytics().collect_location_from_ip() + fig = px.scatter_geo( + df, + lat="latitude", + lon="longitude", + text="city", + hover_name="ip", + hover_data=["region", "country"], + projection="orthographic" # this makes it a globe + ) + + fig.update_geos( + showland=True, landcolor="rgb(217, 217, 217)", # customize land color + showocean=True, oceancolor="rgb(204, 230, 255)", # customize ocean color + showcountries=True, countrycolor="rgb(255, 255, 255)" # show country borders + ) + return dmc.Card( + children=[ + dmc.Space(h=10), + dmc.Text("User Activity Across the Globe", weight=700, size="xl"), + dmc.Space(h=10), + dmc.Text("Where the user has logged onto the app based on the IP address collected from their device.", size="md"), + dmc.Space(h=10), + dcc.Graph(figure=fig) + ], + withBorder=True, + shadow="sm", + radius="md", + style={"height": "520px"}, + ) + def potential_misalignments(): # define categories categories = ["Religion", "Ethnicity", "Smoking", "Drinking", "Marijuana", "Drugs", "Children", "Family Plans", "Education", "Politics"] @@ -204,5 +236,6 @@ def create_user_summary_card(): dmc.Space(h=120), disclosure_vs_privacy(), potential_misalignments(), + geolocation(), dmc.Space(h=50) ]) diff --git a/requirements.txt b/requirements.txt index 62cdb99..38dd6e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,8 @@ +aiohappyeyeballs==2.5.0 +aiohttp==3.11.13 +aiosignal==1.3.2 +async-timeout==5.0.1 +attrs==25.1.0 blinker==1.9.0 certifi==2025.1.31 charset-normalizer==3.4.1 @@ -9,6 +14,10 @@ dash-mantine-components==0.12.1 dash-table==5.0.0 exceptiongroup==1.2.2 Flask==3.0.3 +frozenlist==1.5.0 +geographiclib==2.0 +geoip2==5.0.1 +geopy==2.4.1 idna==3.10 importlib_metadata==8.6.1 iniconfig==2.0.0 @@ -16,6 +25,8 @@ itsdangerous==2.2.0 Jinja2==3.1.5 loguru==0.7.3 MarkupSafe==3.0.2 +maxminddb==2.6.3 +multidict==6.1.0 narwhals==1.26.0 nest-asyncio==1.6.0 numpy==2.0.2 @@ -23,6 +34,8 @@ packaging==24.2 pandas==2.2.3 plotly==6.0.0 pluggy==1.5.0 +propcache==0.3.0 +psycopg2-binary==2.9.10 pytest==8.3.4 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 @@ -35,4 +48,5 @@ typing_extensions==4.12.2 tzdata==2025.1 urllib3==2.3.0 Werkzeug==3.0.6 +yarl==1.18.3 zipp==3.21.0 diff --git a/tests/analytics/test_UserAnalytics.py b/tests/analytics/test_UserAnalytics.py index df32ca7..15e0a18 100644 --- a/tests/analytics/test_UserAnalytics.py +++ b/tests/analytics/test_UserAnalytics.py @@ -17,6 +17,12 @@ "device_model": "unknown", "device_platform": "ios", "device_os_versions": "16.5.1" + }, + { + "ip_address": "130.279.438.00", + "device_model": "unknown", + "device_platform": "ios", + "device_os_versions": "16.5.1" } ], "account": { @@ -208,4 +214,9 @@ def test_count_displayed_attributes(user_analytics): def test_profile_preference_selections(user_analytics): profile, prefs = user_analytics.profile_preference_selections() assert len(profile) == len(prefs) - assert len(profile) == 10 \ No newline at end of file + assert len(profile) == 10 + +# TODO: this needs to be mocked out and better tests added +# def test_collect_location_from_ip(user_analytics): +# result = user_analytics.collect_location_from_ip() +# assert result is not None \ No newline at end of file