From 99dff0a4e780c3774eef987c7205cbd79aec255c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 4 Dec 2025 00:47:56 +0000 Subject: [PATCH 1/2] feat(dashboard): add interactive Supabase complaints dashboard - Add Streamlit dashboard with live Supabase connection - Implement filter dropdowns: category, severity, status, date range, customer segment - Add dynamic statistics that update with filters - Add interactive charts: time series (daily/weekly/monthly toggle), category breakdown, severity distribution, resolution time distribution - Add outlier analysis section with tabs for long resolution, high compensation, and zero-day cases - Add repeat complainers section - Include caching with 5-minute TTL for performance Co-Authored-By: Joao Esteves --- dashboard/app.py | 484 +++++++++++++++++++++++++++++++++++++ dashboard/requirements.txt | 4 + 2 files changed, 488 insertions(+) create mode 100644 dashboard/app.py create mode 100644 dashboard/requirements.txt diff --git a/dashboard/app.py b/dashboard/app.py new file mode 100644 index 00000000..5f5ac1ee --- /dev/null +++ b/dashboard/app.py @@ -0,0 +1,484 @@ +""" +Interactive Complaints Dashboard with Supabase Connection. + +This Streamlit dashboard provides real-time analysis of customer complaints +with filtering, dynamic statistics, and interactive charts. +""" + +import streamlit as st +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +from datetime import datetime, timedelta +from typing import Dict, List, Any, Optional +from supabase import create_client, Client + +SUPABASE_URL = "https://nvpgcrcvrouihxsakovx.supabase.co" +SUPABASE_ANON_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im52cGdjcmN2cm91aWh4c2Frb3Z4Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NjQ3OTIyNjEsImV4cCI6MjA4MDM2ODI2MX0.JXZUirdsnvxp6UWZznATkFcVVYv-EnmTH6FjsYMD0ZI" + +st.set_page_config( + page_title="Complaints Dashboard", + page_icon="📊", + layout="wide", + initial_sidebar_state="expanded" +) + +st.markdown(""" + +""", unsafe_allow_html=True) + + +@st.cache_resource +def get_supabase_client() -> Client: + """Initialize and return Supabase client.""" + return create_client(SUPABASE_URL, SUPABASE_ANON_KEY) + + +@st.cache_data(ttl=300) +def load_complaints_data() -> pd.DataFrame: + """ + Load complaints data from Supabase with caching. + + Returns: + DataFrame containing all complaints data. + """ + supabase = get_supabase_client() + response = supabase.table("santander_customer_complaints").select("*").execute() + + if response.data: + df = pd.DataFrame(response.data) + if "complaint_date" in df.columns: + df["complaint_date"] = pd.to_datetime(df["complaint_date"]) + if "compensation_amount" in df.columns: + df["compensation_amount"] = pd.to_numeric( + df["compensation_amount"], errors="coerce" + ).fillna(0) + return df + return pd.DataFrame() + + +def calculate_stats(df: pd.DataFrame) -> Dict[str, Any]: + """ + Calculate summary statistics from complaints data. + + Args: + df: DataFrame containing complaints data. + + Returns: + Dictionary with calculated statistics. + """ + if df.empty: + return { + "total": 0, + "avg_resolution": 0, + "median_resolution": 0, + "total_compensation": 0, + "avg_compensation": 0, + "repeat_complainers": 0 + } + + resolution_days = df["resolution_days"].fillna(0) + compensation = df["compensation_amount"].fillna(0) + + customer_counts = df["customer_id"].value_counts() + repeat_complainers = (customer_counts >= 3).sum() + + return { + "total": len(df), + "avg_resolution": resolution_days.mean(), + "median_resolution": resolution_days.median(), + "total_compensation": compensation.sum(), + "avg_compensation": compensation.mean(), + "repeat_complainers": repeat_complainers + } + + +def get_time_series_data( + df: pd.DataFrame, + granularity: str +) -> pd.DataFrame: + """ + Aggregate complaints by time period. + + Args: + df: DataFrame containing complaints data. + granularity: Time granularity ('Daily', 'Weekly', 'Monthly'). + + Returns: + DataFrame with time series aggregation. + """ + if df.empty or "complaint_date" not in df.columns: + return pd.DataFrame() + + df_copy = df.copy() + + if granularity == "Daily": + df_copy["period"] = df_copy["complaint_date"].dt.date + elif granularity == "Weekly": + df_copy["period"] = df_copy["complaint_date"].dt.to_period("W").dt.start_time.dt.date + else: + df_copy["period"] = df_copy["complaint_date"].dt.to_period("M").dt.start_time.dt.date + + time_series = df_copy.groupby("period").agg( + count=("complaint_id", "count"), + avg_compensation=("compensation_amount", "mean") + ).reset_index() + + time_series["period"] = pd.to_datetime(time_series["period"]) + return time_series.sort_values("period") + + +def get_category_breakdown(df: pd.DataFrame) -> pd.DataFrame: + """ + Get complaint counts and average compensation by category. + + Args: + df: DataFrame containing complaints data. + + Returns: + DataFrame with category breakdown. + """ + if df.empty: + return pd.DataFrame() + + breakdown = df.groupby("category").agg( + count=("complaint_id", "count"), + avg_compensation=("compensation_amount", "mean"), + avg_resolution=("resolution_days", "mean") + ).reset_index() + + return breakdown.sort_values("count", ascending=False) + + +def get_outliers(df: pd.DataFrame) -> Dict[str, pd.DataFrame]: + """ + Identify outlier complaints. + + Args: + df: DataFrame containing complaints data. + + Returns: + Dictionary with outlier DataFrames. + """ + return { + "long_resolution": df[df["resolution_days"] > 60].nlargest( + 10, "resolution_days" + ), + "high_compensation": df[df["compensation_amount"] > 300].nlargest( + 10, "compensation_amount" + ), + "zero_day": df[df["resolution_days"] == 0].head(10) + } + + +def get_repeat_complainers(df: pd.DataFrame) -> pd.DataFrame: + """ + Identify customers with 3+ complaints. + + Args: + df: DataFrame containing complaints data. + + Returns: + DataFrame with repeat complainer details. + """ + if df.empty: + return pd.DataFrame() + + customer_stats = df.groupby("customer_id").agg( + complaint_count=("complaint_id", "count"), + categories=("category", lambda x: list(set(x))), + total_compensation=("compensation_amount", "sum"), + avg_resolution=("resolution_days", "mean") + ).reset_index() + + repeat = customer_stats[customer_stats["complaint_count"] >= 3] + return repeat.sort_values("complaint_count", ascending=False).head(10) + + +def main(): + """Main dashboard application.""" + st.markdown('

Santander Complaints Dashboard

', unsafe_allow_html=True) + + with st.spinner("Loading data from Supabase..."): + df = load_complaints_data() + + if df.empty: + st.error("No data available. Please check the Supabase connection.") + return + + st.sidebar.header("Filters") + + if st.sidebar.button("Refresh Data"): + st.cache_data.clear() + st.rerun() + + categories = ["All"] + sorted(df["category"].dropna().unique().tolist()) + selected_category = st.sidebar.selectbox("Category", categories) + + severities = ["All"] + sorted(df["severity"].dropna().unique().tolist()) + selected_severity = st.sidebar.selectbox("Severity", severities) + + statuses = ["All"] + sorted(df["status"].dropna().unique().tolist()) + selected_status = st.sidebar.selectbox("Status", statuses) + + segments = ["All"] + sorted(df["customer_segment"].dropna().unique().tolist()) + selected_segment = st.sidebar.selectbox("Customer Segment", segments) + + st.sidebar.subheader("Date Range") + min_date = df["complaint_date"].min().date() + max_date = df["complaint_date"].max().date() + + date_from = st.sidebar.date_input("From", min_date, min_value=min_date, max_value=max_date) + date_to = st.sidebar.date_input("To", max_date, min_value=min_date, max_value=max_date) + + filtered_df = df.copy() + + if selected_category != "All": + filtered_df = filtered_df[filtered_df["category"] == selected_category] + if selected_severity != "All": + filtered_df = filtered_df[filtered_df["severity"] == selected_severity] + if selected_status != "All": + filtered_df = filtered_df[filtered_df["status"] == selected_status] + if selected_segment != "All": + filtered_df = filtered_df[filtered_df["customer_segment"] == selected_segment] + + filtered_df = filtered_df[ + (filtered_df["complaint_date"].dt.date >= date_from) & + (filtered_df["complaint_date"].dt.date <= date_to) + ] + + stats = calculate_stats(filtered_df) + + st.subheader("Key Metrics") + col1, col2, col3, col4, col5, col6 = st.columns(6) + + with col1: + st.metric("Total Complaints", f"{stats['total']:,}") + with col2: + st.metric("Avg Resolution", f"{stats['avg_resolution']:.1f} days") + with col3: + st.metric("Median Resolution", f"{stats['median_resolution']:.1f} days") + with col4: + st.metric("Total Compensation", f"£{stats['total_compensation']:,.2f}") + with col5: + st.metric("Avg Compensation", f"£{stats['avg_compensation']:.2f}") + with col6: + st.metric("Repeat Complainers", f"{stats['repeat_complainers']}") + + st.markdown("---") + + st.subheader("Complaints Over Time") + granularity = st.radio( + "Time Granularity", + ["Daily", "Weekly", "Monthly"], + horizontal=True, + index=1 + ) + + time_series = get_time_series_data(filtered_df, granularity) + + if not time_series.empty: + fig_time = px.line( + time_series, + x="period", + y="count", + title=f"Complaints ({granularity})", + labels={"period": "Date", "count": "Number of Complaints"} + ) + fig_time.update_traces(line_color="#ec0000", line_width=2) + fig_time.update_layout( + hovermode="x unified", + plot_bgcolor="white", + paper_bgcolor="white" + ) + st.plotly_chart(fig_time, use_container_width=True) + else: + st.info("No time series data available for the selected filters.") + + st.markdown("---") + + col_left, col_right = st.columns(2) + + with col_left: + st.subheader("Complaints by Category") + category_data = get_category_breakdown(filtered_df) + + if not category_data.empty: + fig_cat = px.bar( + category_data, + x="count", + y="category", + orientation="h", + title="Category Distribution", + labels={"count": "Number of Complaints", "category": "Category"}, + color="avg_compensation", + color_continuous_scale=["#ffcccc", "#ec0000", "#8b0000"] + ) + fig_cat.update_layout( + plot_bgcolor="white", + paper_bgcolor="white", + yaxis={"categoryorder": "total ascending"} + ) + st.plotly_chart(fig_cat, use_container_width=True) + else: + st.info("No category data available.") + + with col_right: + st.subheader("Severity Distribution") + if not filtered_df.empty: + severity_counts = filtered_df["severity"].value_counts().reset_index() + severity_counts.columns = ["severity", "count"] + + fig_sev = px.pie( + severity_counts, + values="count", + names="severity", + title="Complaints by Severity", + color_discrete_sequence=["#ec0000", "#ff6666", "#ffcccc", "#8b0000"] + ) + fig_sev.update_traces(textposition="inside", textinfo="percent+label") + st.plotly_chart(fig_sev, use_container_width=True) + else: + st.info("No severity data available.") + + st.markdown("---") + + st.subheader("Resolution Time Distribution") + if not filtered_df.empty: + bins = [0, 1, 7, 14, 30, 60, float("inf")] + labels = ["0 days", "1-7 days", "8-14 days", "15-30 days", "31-60 days", "60+ days"] + filtered_df["resolution_bucket"] = pd.cut( + filtered_df["resolution_days"], + bins=bins, + labels=labels, + include_lowest=True + ) + + resolution_dist = filtered_df["resolution_bucket"].value_counts().reset_index() + resolution_dist.columns = ["bucket", "count"] + resolution_dist = resolution_dist.sort_values( + "bucket", + key=lambda x: pd.Categorical(x, categories=labels, ordered=True) + ) + + fig_res = px.bar( + resolution_dist, + x="bucket", + y="count", + title="Resolution Time Distribution", + labels={"bucket": "Resolution Time", "count": "Number of Complaints"}, + color="count", + color_continuous_scale=["#ffcccc", "#ec0000"] + ) + fig_res.update_layout( + plot_bgcolor="white", + paper_bgcolor="white", + showlegend=False + ) + st.plotly_chart(fig_res, use_container_width=True) + + st.markdown("---") + + st.subheader("Outlier Analysis") + outliers = get_outliers(filtered_df) + + tab1, tab2, tab3 = st.tabs([ + "Long Resolution (>60 days)", + "High Compensation (>£300)", + "Zero-Day Resolution" + ]) + + with tab1: + if not outliers["long_resolution"].empty: + st.dataframe( + outliers["long_resolution"][[ + "complaint_id", "customer_id", "category", + "severity", "resolution_days", "compensation_amount" + ]].head(10), + use_container_width=True + ) + else: + st.info("No long resolution outliers found.") + + with tab2: + if not outliers["high_compensation"].empty: + st.dataframe( + outliers["high_compensation"][[ + "complaint_id", "customer_id", "category", + "severity", "resolution_days", "compensation_amount" + ]].head(10), + use_container_width=True + ) + else: + st.info("No high compensation outliers found.") + + with tab3: + if not outliers["zero_day"].empty: + st.dataframe( + outliers["zero_day"][[ + "complaint_id", "customer_id", "category", + "severity", "resolution_days", "compensation_amount" + ]].head(10), + use_container_width=True + ) + else: + st.info("No zero-day resolution cases found.") + + st.markdown("---") + + st.subheader("Repeat Complainers (3+ complaints)") + repeat_complainers = get_repeat_complainers(filtered_df) + + if not repeat_complainers.empty: + for _, row in repeat_complainers.iterrows(): + with st.container(): + st.markdown(f""" +
+ {row['customer_id']} - {row['complaint_count']} complaints
+ Categories: {', '.join(row['categories'][:3])}{'...' if len(row['categories']) > 3 else ''}
+ Total Compensation: £{row['total_compensation']:.2f} | + Avg Resolution: {row['avg_resolution']:.1f} days +
+ """, unsafe_allow_html=True) + else: + st.info("No repeat complainers found in the filtered data.") + + st.markdown("---") + st.caption( + f"Data source: Supabase | " + f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | " + f"Showing {len(filtered_df):,} of {len(df):,} complaints" + ) + + +if __name__ == "__main__": + main() diff --git a/dashboard/requirements.txt b/dashboard/requirements.txt new file mode 100644 index 00000000..14213291 --- /dev/null +++ b/dashboard/requirements.txt @@ -0,0 +1,4 @@ +streamlit>=1.28.0 +pandas>=2.0.0 +plotly>=5.18.0 +supabase>=2.0.0 From 8a65d2d758a7447e5d4c946c2ad62c6b8355eff8 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 4 Dec 2025 09:14:34 +0000 Subject: [PATCH 2/2] fix(dashboard): improve error message with RLS setup instructions Co-Authored-By: Joao Esteves --- dashboard/app.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/dashboard/app.py b/dashboard/app.py index 5f5ac1ee..480623ed 100644 --- a/dashboard/app.py +++ b/dashboard/app.py @@ -232,7 +232,22 @@ def main(): df = load_complaints_data() if df.empty: - st.error("No data available. Please check the Supabase connection.") + st.error(""" + **No data available.** This usually means Row Level Security (RLS) is blocking access. + + To fix this, add a SELECT policy for the `anon` role in Supabase: + + 1. Go to **Table editor** → `santander_customer_complaints` → **RLS / Policies** + 2. Click **New policy** → Action: **SELECT** → Role: **anon** → Using expression: `true` + 3. Save and refresh this page + + Or run this SQL in the SQL Editor: + ```sql + CREATE POLICY "Allow anonymous read access" + ON public.santander_customer_complaints + FOR SELECT TO anon USING (true); + ``` + """) return st.sidebar.header("Filters")