spottseng · spottseng · Apr 24, 2025 · Apr 23, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/.env_example b/.env_example
@@ -1,5 +1,6 @@
-HOST=0.0.0.0
+HOST='0.0.0.0'
 PORT=8050
 USER_FILE_PATH='file/path/user.json'
+MATCH_FILE_PATH='file/path/matches.json'
 ASSETS_PATH='app/assets/'
 GEOLITE_DB_PATH='data/GeoLite2-City.mmdb'
diff --git a/app/analytics/MatchAnalytics.py b/app/analytics/MatchAnalytics.py
@@ -1,132 +1,119 @@
-import pandas as pd
-import re
-import json
-
-
-def prepare_uploaded_match_data(file_path="../data/app_uploaded_files/matches.json"):
-    __validate_upload_file_type(file_path)
-    __validate_match_file_upload(file_path)
-
-    with open(file_path, 'r') as file:
-        # match upload data is a list of dictionaries
-        match_upload_data = json.load(file)
-
-    events = []
-    for interaction, all_actions in enumerate(match_upload_data):
-        # action type is like, match, chats, blocks, overarching "action"
-        for action_type, actions in all_actions.items():
-            # action is the metadata assoc. one event of the action type
-            for action in actions:
-                action["interaction_id"] = interaction
-                events.append(action)
-
-    return pd.DataFrame(events).sort_values("timestamp")
-
-
-def date_count_distribution(events):
-    chat_events = events[events["type"] == "chats"]
-    chats_per_interaction = chat_events.groupby('interaction_id').size()
-
-    # convert the Series to a DataFrame with specified column names, have to reset the index
-    interaction_counts = chats_per_interaction.to_frame().reset_index()
-    interaction_counts.columns = ['interaction_id', 'outgoing_messages']
-    return interaction_counts
-
-
-def activity_by_date(events):
-    events['activity_date'] = pd.DatetimeIndex(events["timestamp"]).date
-
-    event_type_counts_by_date = events.groupby(['activity_date', 'type']).size()
-
-    # creating a DataFrame from the Series, have to reset the index
-    action_type_freq_per_day = pd.DataFrame(event_type_counts_by_date).reset_index()
-    action_type_freq_per_day.columns = ['activity_date', 'type', 'count']
-
-    return action_type_freq_per_day
-
-
-def analyze_double_likes(events):
-    like_events = events[events["type"] == "like"]
-    multi_like_event_count = len(like_events.groupby('interaction_id').filter(lambda x: len(x) > 1))
-    single_like_event_count = len(like_events) - multi_like_event_count
-
-    single_vs_double_likes = pd.DataFrame(
-        [['Single Like', single_like_event_count], ['Multiple Likes', multi_like_event_count]],
-        columns=["Like Frequency", "Count"])
-
-    return single_vs_double_likes
-
-
-def total_counts(events):
-    distinct_interaction_count = len(pd.unique(events['interaction_id']))
-    like_event_count = len(events[events['type'] == "like"])
-    match_event_count = len(events[events['type'] == "match"])
-
-    chat_events = events[events['type'] == "chats"]
-    chat_event_count = len(chat_events.interaction_id.unique())
-
-    totals = pd.DataFrame(
-        [['Distinct Interactions', distinct_interaction_count], ['Outgoing Likes', like_event_count],
-         ['Matches', match_event_count],
-         ['Chats', chat_event_count]],
-        columns=["action_type", "count"])
-    return totals
-
-
-def commented_outgoing_likes(events):
-    likes_w_comments = __build_comments_list(events)
-
-    return pd.DataFrame(likes_w_comments, columns=["Comments"])
-
-
-def like_comment_ratios(events):
-    likes_w_comments = __build_comments_list(events)
-    likes_wo_comment = len(events) - len(likes_w_comments)
-
-    likes_w_wo_comments = pd.DataFrame(
-        [['Likes with Comments', len(likes_w_comments)], ['Likes without Comments', likes_wo_comment]],
-        columns=["Likes With/ Without Comments", "Count"])
-    return likes_w_wo_comments
-
-
-def phone_number_shares(events):
-    chats_w_messages = events.where(events["type"] == "chats")
-    chats_w_messages = chats_w_messages[chats_w_messages['body'].notna()]
-    total_messages_w_chats = len(chats_w_messages)
-
-    message_bodies = chats_w_messages['body']
-
-    phone_number_shared = []
-    for message in message_bodies:
-        # finds common phone number formats in the message: XXX-XXX-XXXX, XXX.XXX.XXXX, (XXX) XXX-XXXX
-        message_containing_number = re.findall(r"\(?\d{3}\)?[-.\s]\d{3}[-.\s]\d{4}", message)
-
-        if len(message_containing_number) >= 1:
-            phone_number_shared.append(message_containing_number)
-
-    phone_number_share_ratios = pd.DataFrame([['Gave Phone Number', len(phone_number_shared)],
-                                              ['Did Not Give Phone Number', total_messages_w_chats]],
-                                             columns=["Message Outcomes", "Count"])
-    return phone_number_share_ratios
-
-def __build_comments_list(events):
-    likes_w_comments = []
-    like_events = events["like"].dropna()
-    for value in like_events:
-        # likes are an array with a single element (most of the time)
-        # TODO: handle multiple likes, it's rare but possible
-        like_event = value[0]
-        if like_event.get('comment') is not None:
-            likes_w_comments.append(like_event.get('comment'))
-
-    return likes_w_comments
-
-
-def __validate_upload_file_type(file_path):
-    if not file_path.endswith('.json'):
-        raise ValueError("Invalid file type. Please upload a JSON file.")
-
-
-def __validate_match_file_upload(file_path):
-    if 'match' not in file_path:
-        raise ValueError("Invalid file name. Please upload a file with 'match' in the file name.")
+import json, os
+from datetime import datetime, timedelta
+
+class MatchAnalytics:
+    def __init__(self):
+        self.match_file_path = os.environ.get("MATCH_FILE_PATH")
+
+        if self.match_file_path is None:
+            raise Exception("MATCH_FILE_PATH environment varviable is not set.")
+
+        if '.json' not in self.match_file_path:
+            raise Exception("The match file needs to be a JSON file.")
+
+        with open(self.match_file_path, 'r') as file:
+            match_data = json.load(file)
+        self.match_data = match_data
+
+    def get_match_data(self):
+        all_matches = []
+        for entry in self.match_data:
+            matches = entry.get("match", [])
+            all_matches.extend(matches)
+        return all_matches
+
+    def get_block_data(self):
+        all_blocks = []
+        for entry in self.match_data:
+            blocks = entry.get("block", [])
+            all_blocks.extend(blocks)
+        return all_blocks 
+
+    def get_likes_data(self):
+        all_likes = []
+        for entry in self.match_data:
+            likes = entry.get("like", [])
+            all_likes.extend(likes)
+        return all_likes
+
+    def get_chat_data(self):
+        all_chats = []
+        for entry in self.match_data:
+            chats = entry.get("chats", [])
+            all_chats.extend(chats)
+        return all_chats 
+
+    def get_message_count_last_12_months(self):
+        now = datetime.now()
+        one_year_ago = now - timedelta(days=365)
+
+        msg_counts_per_month = []
+        for entry in self.match_data:
+            match = entry.get("match", [])
+            chats = entry.get("chats", [])
+            if match:
+                match_time = datetime.fromisoformat(match[0]["timestamp"])
+                if match_time >= one_year_ago:
+                    month = match_time.strftime("%Y-%m")
+                    msg_counts_per_month.append({
+                        "month": month,
+                        "message_count": len(chats)
+                    })
+        return msg_counts_per_month
+
+    def get_response_latency(self):
+        latency_data = []
+        for entry in self.match_data:
+            match = entry.get("match", [])
+            chats = entry.get("chats", [])
+
+            if match and chats:
+                match_time = datetime.fromisoformat(match[0]["timestamp"])
+                first_message_time = datetime.fromisoformat(chats[0]["timestamp"])
+                latency = (first_message_time - match_time).total_seconds() / (3600 * 24)
+
+                latency_data.append({
+                    "match_time": match_time,
+                    "first_message_time": first_message_time,
+                    "latency_days": latency
+                })
+        return latency_data
+
+    def get_match_durations(self):
+        durations = []
+        for entry in self.match_data:
+            match = entry.get("match", [])
+            block = entry.get("block", [])
+
+            if match and block:
+                match_time = datetime.fromisoformat(match[0]["timestamp"])
+                block_time = datetime.fromisoformat(block[0]["timestamp"])
+                duration_days = (block_time - match_time).days
+
+                durations.append({
+                    "match_time": match_time,
+                    "block_time": block_time,
+                    "duration_days": duration_days
+                })
+        return durations
+
+    def get_match_rm_counts(self):
+        records = []
+
+        for entry in self.match_data:
+            match_time = entry.get("match", [{}])[0].get("timestamp")
+            block_time = entry.get("block", [{}])[0].get("timestamp")
+            chats = entry.get("chats", [])
+
+            if match_time and block_time:
+                match_dt = datetime.fromisoformat(match_time)
+                block_dt = datetime.fromisoformat(block_time)
+                delta_days = (block_dt - match_dt).days
+                message_count = len(chats)
+
+                records.append({
+                    "message_count": message_count,
+                    "duration_days": delta_days
+                })
+
+        return records
diff --git a/app/app.py → app/main.py b/app/app.py → app/main.py