diff --git a/Dockerfile b/Dockerfile index 79a8ee1..4f784de 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,4 +17,4 @@ COPY . . EXPOSE 8050 # Command to run the application -CMD ["python", "app.py"] \ No newline at end of file +CMD ["python", "app/app.py"] \ No newline at end of file diff --git a/__init__.py b/app/__init__.py similarity index 100% rename from __init__.py rename to app/__init__.py diff --git a/analytics.py b/app/analytics/MatchAnalytics.py similarity index 100% rename from analytics.py rename to app/analytics/MatchAnalytics.py diff --git a/user_analytics.py b/app/analytics/UserAnalytics.py similarity index 82% rename from user_analytics.py rename to app/analytics/UserAnalytics.py index 3fcd381..7593ecd 100644 --- a/user_analytics.py +++ b/app/analytics/UserAnalytics.py @@ -1,4 +1,4 @@ -from ip2geotools.databases.noncommercial import DbIpCity +# from ip2geotools.databases.noncommercial import DbIpCity import json import pandas as pd @@ -28,10 +28,11 @@ def parse_user_ip_addresses(file_path='data/export/user.json'): lats = [] longs = [] # lookup the latitude and longitude coordinates of each IP address - for ip in ip_addresses[:100]: - coord = DbIpCity.get(ip, api_key="free") - lats.append(coord.latitude) - longs.append(coord.longitude) + # TODO: this API call doesn't work super well, replace it + # for ip in ip_addresses[:100]: + # coord = DbIpCity.get(ip, api_key="free") + # lats.append(coord.latitude) + # longs.append(coord.longitude) # define column names and create a DataFrame coordinates = pd.DataFrame({'latitude': lats, 'longitude': longs}) diff --git a/app/analytics/__init__.py b/app/analytics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app.py b/app/app.py similarity index 94% rename from app.py rename to app/app.py index 08cf0dd..cf92336 100644 --- a/app.py +++ b/app/app.py @@ -11,9 +11,11 @@ import os import base64 -import pages.matches as matches -import pages.user as user -import pages.home as home +import pages.MatchPage as MatchPage +import pages.UserPage as UserPage +import pages.HomePage as HomePage + +from tools.Logger import logger USER_FILE_UPLOAD_DIRECTORY = "../data/app_uploaded_files" @@ -21,9 +23,9 @@ server = Flask(__name__) app = Dash(__name__, server=server, use_pages=True, external_stylesheets=external_stylesheets) -dash.register_page("home", path='/', layout=home.layout) -dash.register_page("matches", path='/matches', layout=matches.layout) -dash.register_page("user", path='/user', layout=user.layout) +dash.register_page("home", path='/', layout=HomePage.layout) +dash.register_page("matches", path='/matches', layout=MatchPage.layout) +dash.register_page("user", path='/user', layout=UserPage.layout) app.layout = html.Div([ dmc.Title('Hinge Data Analysis', color="black", size="h1"), @@ -153,4 +155,5 @@ def update_output(list_of_contents, list_of_names): if __name__ == '__main__': + logger.info("Starting the Dash Plotly app...") app.run(debug=True, host='0.0.0.0', port=8050) \ No newline at end of file diff --git a/pages/home.py b/app/pages/HomePage.py similarity index 100% rename from pages/home.py rename to app/pages/HomePage.py diff --git a/pages/matches.py b/app/pages/MatchPage.py similarity index 83% rename from pages/matches.py rename to app/pages/MatchPage.py index 8b3fbfc..1c5f613 100644 --- a/pages/matches.py +++ b/app/pages/MatchPage.py @@ -4,7 +4,7 @@ import plotly.express as px from dash.exceptions import PreventUpdate -import analytics +import analytics.MatchAnalytics as ma global normalized_events @@ -80,8 +80,8 @@ def serve_layout(): def update_graph_live(data): __check_for_live_update_data(data) __setup_global_norm_events() - return px.funnel(analytics.total_counts(normalized_events), x=analytics.total_counts(normalized_events)["count"], - y=analytics.total_counts(normalized_events)["action_type"], + return px.funnel(ma.total_counts(normalized_events), x=ma.total_counts(normalized_events)["count"], + y=ma.total_counts(normalized_events)["action_type"], labels={'y': 'interaction count'}) @@ -92,7 +92,7 @@ def update_graph_live(data): def update_double_likes_pie(data): __check_for_live_update_data(data) __setup_global_norm_events() - return px.pie(analytics.analyze_double_likes(normalized_events), values="Count", names="Like Frequency", + return px.pie(ma.analyze_double_likes(normalized_events), values="Count", names="Like Frequency", title="Number of Outgoing Likes per Person") @@ -103,7 +103,7 @@ def update_double_likes_pie(data): def update_commented_likes_pie(data): __check_for_live_update_data(data) __setup_global_norm_events() - return px.pie(analytics.like_comment_ratios(normalized_events), values="Count", names="Likes With/ Without Comments", + return px.pie(ma.like_comment_ratios(normalized_events), values="Count", names="Likes With/ Without Comments", title="Outgoing Likes with Comments") @@ -114,10 +114,10 @@ def update_commented_likes_pie(data): def update_action_types_graph(data): __check_for_live_update_data(data) __setup_global_norm_events() - return px.line(analytics.activity_by_date(normalized_events), - x=analytics.activity_by_date(normalized_events)['activity_date'], - y=analytics.activity_by_date(normalized_events)['count'], - color=analytics.activity_by_date(normalized_events)['type'], + return px.line(ma.activity_by_date(normalized_events), + x=ma.activity_by_date(normalized_events)['activity_date'], + y=ma.activity_by_date(normalized_events)['count'], + color=ma.activity_by_date(normalized_events)['type'], labels={'x': 'activity_date', 'y': 'count'}) @@ -128,7 +128,7 @@ def update_action_types_graph(data): def update_number_shares_graph(data): __check_for_live_update_data(data) __setup_global_norm_events() - return px.pie(analytics.phone_number_shares(normalized_events), values="Count", names="Message Outcomes") + return px.pie(ma.phone_number_shares(normalized_events), values="Count", names="Message Outcomes") @callback( @@ -138,7 +138,7 @@ def update_number_shares_graph(data): def update_messages_per_chat_graph(data): __check_for_live_update_data(data) __setup_global_norm_events() - return px.histogram(analytics.date_count_distribution(normalized_events), x='outgoing_messages', nbins=50).update_layout(bargap=0.2) + return px.histogram(ma.date_count_distribution(normalized_events), x='outgoing_messages', nbins=50).update_layout(bargap=0.2) @callback( @@ -148,7 +148,7 @@ def update_messages_per_chat_graph(data): def update_comment_table(data): __check_for_live_update_data(data) __setup_global_norm_events() - commented_outgoing_likes_data = analytics.commented_outgoing_likes(normalized_events).to_dict('records') + commented_outgoing_likes_data = ma.commented_outgoing_likes(normalized_events).to_dict('records') return [ dash_table.DataTable(data=commented_outgoing_likes_data, page_size=10, style_cell={'textAlign': 'left'}) @@ -160,7 +160,7 @@ def update_comment_table(data): def __setup_global_norm_events(file_path="../data/app_uploaded_files/matches.json"): global normalized_events - normalized_events = analytics.prepare_uploaded_match_data(file_path) + normalized_events = ma.prepare_uploaded_match_data(file_path) def __check_for_live_update_data(data): diff --git a/pages/user.py b/app/pages/UserPage.py similarity index 94% rename from pages/user.py rename to app/pages/UserPage.py index d60629a..0c01b04 100644 --- a/pages/user.py +++ b/app/pages/UserPage.py @@ -4,8 +4,8 @@ import plotly.express as px from dash.exceptions import PreventUpdate -import analytics -import user_analytics as ua +import analytics.MatchAnalytics as MatchAnalytics +import analytics.UserAnalytics as ua layout = html.Div([ @@ -45,7 +45,7 @@ def update_comment_table(data): __check_for_live_update_data(data) - account_data = analytics.import_user_account_data() + account_data = MatchAnalytics.import_user_account_data() # passing in the account data as a list for the DataTable return [ dash_table.DataTable(data=[account_data], page_size=5, diff --git a/app/pages/__init__.py b/app/pages/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/tools/Logger.py b/app/tools/Logger.py new file mode 100644 index 0000000..4002e57 --- /dev/null +++ b/app/tools/Logger.py @@ -0,0 +1,10 @@ +from loguru import logger +import sys + +# remove default loguru logger +logger.remove() + +logger.add( + sys.stderr, + format="{time} | {level} | {message} | {file}:{line} | {function}", + level="DEBUG") diff --git a/app/tools/__init__.py b/app/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/data_utility.py b/app/utilities/DataUtility.py similarity index 100% rename from data_utility.py rename to app/utilities/DataUtility.py diff --git a/app/utilities/__init__.py b/app/utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt index 31e5363..4f86ebc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,140 +1,32 @@ -aiohttp==3.9.1 -aiosignal==1.3.1 -ansi2html==1.9.1 -appnope==0.1.4 -astroid==3.0.2 -asttokens==2.4.1 -async-timeout==4.0.3 -attrs==23.2.0 -autopep8==2.0.4 -backcall==0.2.0 -beautifulsoup4==4.12.3 -bleach==6.1.0 -blinker==1.7.0 -certifi==2023.11.17 -chardet==5.2.0 -charset-normalizer==3.3.2 -click==8.1.7 -cssselect==1.2.0 -dash==2.14.2 +blinker==1.9.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +click==8.1.8 +dash==2.18.2 dash-core-components==2.0.0 dash-html-components==2.0.0 dash-mantine-components==0.12.1 dash-table==5.0.0 -decorator==5.1.1 -defusedxml==0.7.1 -dicttoxml==1.7.16 -dill==0.3.7 -docopt==0.6.2 -docutils==0.20.1 -exceptiongroup==1.2.0 -executing==2.0.1 -fastjsonschema==2.19.1 -Flask==3.0.0 -frozenlist==1.4.1 -future==0.18.3 -geocoder==1.38.1 -geoip2==4.8.0 -h11==0.14.0 -idna==3.6 -importlib-metadata==7.0.1 -iniconfig==2.0.0 -ip2geotools==0.1.6 -IP2Location==8.10.2 -ipython==8.12.3 -isort==5.13.2 -itsdangerous==2.1.2 -jaraco.classes==3.3.0 -jedi==0.19.1 -Jinja2==3.1.3 -jsonschema==4.22.0 -jsonschema-specifications==2023.12.1 -jupyter_client==8.6.2 -jupyter_core==5.7.2 -jupyterlab_pygments==0.3.0 -keyring==24.3.0 -lazy-object-proxy==1.10.0 -lxml==5.1.0 -markdown-it-py==3.0.0 -MarkupSafe==2.1.3 -matplotlib-inline==0.1.7 -maxminddb==2.5.2 -mccabe==0.7.0 -mdurl==0.1.2 -mistune==3.0.2 -more-itertools==10.2.0 -multidict==6.0.4 -nbclient==0.10.0 -nbconvert==7.16.4 -nbformat==5.10.4 -nest-asyncio==1.5.9 -nh3==0.2.15 -numpy==1.26.3 -outcome==1.3.0.post0 -packaging==23.2 -pandas==2.1.4 -pandocfilters==1.5.1 -parso==0.8.4 -patsy==0.5.6 -pexpect==4.9.0 -pickleshare==0.7.5 -pip-review==1.3.0 -pipreqs==0.5.0 -pkginfo==1.9.6 -platformdirs==4.1.0 -plotly==5.18.0 -plotly-express==0.4.1 -pluggy==1.4.0 -prompt-toolkit==3.0.43 -ptyprocess==0.7.0 -pure-eval==0.2.2 -pycodestyle==2.11.1 -Pygments==2.17.2 -pylint==3.0.3 -pyparsing==3.1.1 -pyquery==2.0.0 -PySocks==1.7.1 -pytest==7.4.4 -python-dateutil==2.8.2 -pytz==2023.3.post1 -pyzmq==26.0.3 -ratelim==0.1.6 -readme-renderer==42.0 -referencing==0.35.1 -requests==2.31.0 -requests-toolbelt==1.0.0 +Flask==3.0.3 +idna==3.10 +importlib_metadata==8.6.1 +itsdangerous==2.2.0 +Jinja2==3.1.5 +loguru==0.7.3 +MarkupSafe==3.0.2 +narwhals==1.26.0 +nest-asyncio==1.6.0 +numpy==2.0.2 +packaging==24.2 +pandas==2.2.3 +plotly==6.0.0 +python-dateutil==2.9.0.post0 +pytz==2025.1 +requests==2.32.3 retrying==1.3.4 -rfc3986==2.0.0 -rich==13.7.0 -rpds-py==0.18.1 -scipy==1.11.4 -selenium==4.16.0 -six==1.16.0 -sniffio==1.3.0 -sortedcontainers==2.4.0 -soupsieve==2.5 -stack-data==0.6.3 -statsmodels==0.14.1 -tenacity==8.2.3 -tinycss2==1.3.0 -tomli==2.0.1 -tomlkit==0.12.3 -tornado==6.4 -tqdm==4.66.1 -traitlets==5.14.3 -trio==0.24.0 -trio-websocket==0.11.1 -twine==4.0.2 -typed-ast==1.5.5 -typing==3.7.4.3 -typing_extensions==4.9.0 -tzdata==2023.4 -urllib3==2.1.0 -wcwidth==0.2.13 -webencodings==0.5.1 -Werkzeug==3.0.1 -wrapt==1.16.0 -wsproto==1.2.0 -yarg==0.1.9 -yarl==1.9.4 -zipp==3.17.0 +six==1.17.0 +typing_extensions==4.12.2 +tzdata==2025.1 +urllib3==2.3.0 +Werkzeug==3.0.6 +zipp==3.21.0 diff --git a/tests/analytics_test.py b/tests/test_MatchAnalytics.py similarity index 56% rename from tests/analytics_test.py rename to tests/test_MatchAnalytics.py index 9d94bf2..fa706d2 100644 --- a/tests/analytics_test.py +++ b/tests/test_MatchAnalytics.py @@ -1,5 +1,5 @@ import unittest -import analytics +import app.analytics.MatchAnalytics as MatchAnalytics USER_FILE_PATH = 'tests/test_user.json' MATCHES_FILE_PATH = 'tests/test_matches.json' @@ -7,28 +7,28 @@ class AnalyticsTest(unittest.TestCase): def test_total_event_count(self): - test_events = analytics.prepare_uploaded_match_data(MATCHES_FILE_PATH) - total_events = analytics.total_counts(test_events) + test_events = MatchAnalytics.prepare_uploaded_match_data(MATCHES_FILE_PATH) + total_events = MatchAnalytics.total_counts(test_events) self.assertEqual(total_events.size, 8) def test_invalid_file_type(self): with self.assertRaises(ValueError): - analytics.prepare_uploaded_match_data('tests/matches.csv') + MatchAnalytics.prepare_uploaded_match_data('tests/matches.csv') def test_invalid_match_file_upload(self): with self.assertRaises(ValueError): - analytics.prepare_uploaded_match_data(USER_FILE_PATH) + MatchAnalytics.prepare_uploaded_match_data(USER_FILE_PATH) def test_invalid_user_file_upload(self): with self.assertRaises(ValueError): - analytics.import_user_account_data(MATCHES_FILE_PATH) + MatchAnalytics.import_user_account_data(MATCHES_FILE_PATH) def test_account_data_import(self): - results = analytics.import_user_account_data(USER_FILE_PATH) + results = MatchAnalytics.import_user_account_data(USER_FILE_PATH) self.assertEqual(len(results), 9) # 9 keys in the dictionary def test_device_data_import(self): - results = analytics.import_user_device_data(USER_FILE_PATH) + results = MatchAnalytics.import_user_device_data(USER_FILE_PATH) self.assertEqual(len(results), 5)