From 2dac0ccbb58fea6a2c31ab918d448ca32b40e264 Mon Sep 17 00:00:00 2001
From: Diana Vostrova <128544496+Diana-Vostrova@users.noreply.github.com>
Date: Thu, 24 Apr 2025 11:42:42 +0300
Subject: [PATCH 1/2] Blog post about Interpretable SHAP for Credit Risk
Scoring
---
content/docs/Groups/SHAP_for_credit_risk.md | 360 ++++++++++++++++++
public/404.html | 2 +-
...HAP_Contributions_Risky_vs_Safe_client.png | Bin 0 -> 59094 bytes
...ifference_Ours_vs_shap.KernelExplainer.png | Bin 0 -> 56570 bytes
.../12_SHAP_Value_Comparison.png | Bin 0 -> 33975 bytes
.../1_Bar_plot_for_1_prediction.png | Bin 0 -> 274497 bytes
...p_10_Important_Features_for_Prediction.png | Bin 0 -> 201905 bytes
.../4_SHAP_Waterfall_Chart.png | Bin 0 -> 163847 bytes
.../SHAP_for_credit_risk/5_Summary_plot.png | Bin 0 -> 299862 bytes
.../6_Interaction_age_by_duration.png | Bin 0 -> 175791 bytes
..._credit_history_by_other_payment_plans.png | Bin 0 -> 64880 bytes
.../8_Full_SHAP_Interation_Value_matrix.png | Bin 0 -> 94609 bytes
...raction_comparison_risk_vs_safe_client.png | Bin 0 -> 93703 bytes
.../force_plot_custom.html | 14 +
public/categories/index.html | 4 +-
.../ai-playing-geoguessr-explained/index.html | 29 +-
public/docs/groups/cam_and_secam/index.html | 23 +-
.../index.html | 26 +-
.../index.html | 28 +-
.../index.html | 24 +-
public/docs/groups/dndfs_shap/index.html | 14 +-
public/docs/groups/example/index.html | 16 +-
public/docs/groups/gradcam/index.html | 20 +-
.../groups/integrated-gradients/index.html | 27 +-
public/docs/groups/kernel-shap/index.html | 11 +-
public/docs/groups/rag/index.html | 23 +-
.../groups/shap_darya_and_viktoria/index.html | 17 +-
.../groups/shap_for_credit_risk/index.html | 121 ++++++
public/docs/groups/sverl_tac_toe/index.html | 28 +-
public/docs/groups/torchprism/index.html | 44 ++-
.../groups/xai_for_transformers/index.html | 15 +-
public/docs/index.html | 4 +-
public/docs/index.xml | 300 +++++++++++++--
...047f78ae936102b6e97a0eb9c02b05e2a1665.json | 1 +
...9bd60a21ea3356bee5ba07de31fc95722807451.js | 1 +
public/index.html | 18 +-
public/index.xml | 300 +++++++++++++--
public/sitemap.xml | 2 +-
public/tags/index.html | 4 +-
...s_b807c86e8030af4cdc30edccea379f5f.content | 1 +
...scss_b807c86e8030af4cdc30edccea379f5f.json | 1 +
...HAP_Contributions_Risky_vs_Safe_client.png | Bin 0 -> 59094 bytes
...ifference_Ours_vs_shap.KernelExplainer.png | Bin 0 -> 56570 bytes
.../12_SHAP_Value_Comparison.png | Bin 0 -> 33975 bytes
.../1_Bar_plot_for_1_prediction.png | Bin 0 -> 274497 bytes
...p_10_Important_Features_for_Prediction.png | Bin 0 -> 201905 bytes
.../4_SHAP_Waterfall_Chart.png | Bin 0 -> 163847 bytes
.../SHAP_for_credit_risk/5_Summary_plot.png | Bin 0 -> 299862 bytes
.../6_Interaction_age_by_duration.png | Bin 0 -> 175791 bytes
..._credit_history_by_other_payment_plans.png | Bin 0 -> 64880 bytes
.../8_Full_SHAP_Interation_Value_matrix.png | Bin 0 -> 94609 bytes
...raction_comparison_risk_vs_safe_client.png | Bin 0 -> 93703 bytes
.../force_plot_custom.html | 14 +
53 files changed, 1362 insertions(+), 130 deletions(-)
create mode 100644 content/docs/Groups/SHAP_for_credit_risk.md
create mode 100644 public/SHAP_for_credit_risk/10_Waterfall_Comparison_of_SHAP_Contributions_Risky_vs_Safe_client.png
create mode 100644 public/SHAP_for_credit_risk/11_SHAP_Value_Difference_Ours_vs_shap.KernelExplainer.png
create mode 100644 public/SHAP_for_credit_risk/12_SHAP_Value_Comparison.png
create mode 100644 public/SHAP_for_credit_risk/1_Bar_plot_for_1_prediction.png
create mode 100644 public/SHAP_for_credit_risk/3_Top_10_Important_Features_for_Prediction.png
create mode 100644 public/SHAP_for_credit_risk/4_SHAP_Waterfall_Chart.png
create mode 100644 public/SHAP_for_credit_risk/5_Summary_plot.png
create mode 100644 public/SHAP_for_credit_risk/6_Interaction_age_by_duration.png
create mode 100644 public/SHAP_for_credit_risk/7_Interaction_credit_history_by_other_payment_plans.png
create mode 100644 public/SHAP_for_credit_risk/8_Full_SHAP_Interation_Value_matrix.png
create mode 100644 public/SHAP_for_credit_risk/9_SHAP_interaction_comparison_risk_vs_safe_client.png
create mode 100644 public/SHAP_for_credit_risk/force_plot_custom.html
create mode 100644 public/docs/groups/shap_for_credit_risk/index.html
create mode 100644 public/en.search-data.min.d1954b4c1fa008df67fe26a7bae047f78ae936102b6e97a0eb9c02b05e2a1665.json
create mode 100644 public/en.search.min.01b6b493e552379ed80b068779bd60a21ea3356bee5ba07de31fc95722807451.js
create mode 100644 resources/_gen/assets/book.scss_b807c86e8030af4cdc30edccea379f5f.content
create mode 100644 resources/_gen/assets/book.scss_b807c86e8030af4cdc30edccea379f5f.json
create mode 100644 static/SHAP_for_credit_risk/10_Waterfall_Comparison_of_SHAP_Contributions_Risky_vs_Safe_client.png
create mode 100644 static/SHAP_for_credit_risk/11_SHAP_Value_Difference_Ours_vs_shap.KernelExplainer.png
create mode 100644 static/SHAP_for_credit_risk/12_SHAP_Value_Comparison.png
create mode 100644 static/SHAP_for_credit_risk/1_Bar_plot_for_1_prediction.png
create mode 100644 static/SHAP_for_credit_risk/3_Top_10_Important_Features_for_Prediction.png
create mode 100644 static/SHAP_for_credit_risk/4_SHAP_Waterfall_Chart.png
create mode 100644 static/SHAP_for_credit_risk/5_Summary_plot.png
create mode 100644 static/SHAP_for_credit_risk/6_Interaction_age_by_duration.png
create mode 100644 static/SHAP_for_credit_risk/7_Interaction_credit_history_by_other_payment_plans.png
create mode 100644 static/SHAP_for_credit_risk/8_Full_SHAP_Interation_Value_matrix.png
create mode 100644 static/SHAP_for_credit_risk/9_SHAP_interaction_comparison_risk_vs_safe_client.png
create mode 100644 static/SHAP_for_credit_risk/force_plot_custom.html
diff --git a/content/docs/Groups/SHAP_for_credit_risk.md b/content/docs/Groups/SHAP_for_credit_risk.md
new file mode 100644
index 0000000..594a31c
--- /dev/null
+++ b/content/docs/Groups/SHAP_for_credit_risk.md
@@ -0,0 +1,360 @@
+---
+weight: 1
+bookFlatSection: true
+title: "Interpretable SHAP for Credit Risk Scoring"
+---
+
+
+
+
+# Interpretable SHAP for Credit Risk Scoring
+
+## Table of Contents
+
+- [1. Introduction](#1-introduction)
+- [2. Application Domain](#2-application-domain)
+- [3. Methodology](#3-methodology)
+ - [3.1 Dataset and Model Overview](#31-dataset-and-model-overview)
+ - [3.2 Explainability Method: SHAP](#32-explainability-method-shap)
+- [4. Implementation: Applying SHAP to RandomForest](#4-implementation-applying-shap-to-randomforest)
+ - [4.1 Implementation Overview](#41-implementation-overview)
+ - [4.2 How to Use It](#42-how-to-use-it)
+- [5. Experiments and Analysis](#5-experiments-and-analysis)
+ - [5.1 Evaluation Metrics](#51-evaluation-metrics)
+ - [5.2 Explanation Techniques and Visualizations](#52-explanation-techniques-and-visualizations)
+ - [5.3 Interpretations and Findings](#53-interpretations-and-findings)
+- [6. My Implementation](#6-my-implementation)
+- [7. Conclusion, Future Work, Limitations and Ethical Considerations](#7-conclusion-future-work-limitations-and-ethical-considerations)
+- [8. References](#8-references)
+
+---
+
+## 1. Introduction
+
+This project explores how SHAP (SHapley Additive exPlanations) can be used to enhance interpretability in credit scoring. The core objective is to implement SHAP from scratch and apply it to a Random Forest model trained on a credit dataset.
+
+**Research Question:** *Can we accurately replicate SHAP value explanations without relying on libraries, and how useful are these explanations for understanding credit risk decisions made by machine learning models?*
+
+SHAP was chosen due to its strong theoretical grounding in cooperative game theory and its ability to produce consistent, locally accurate feature attributions. This makes it particularly suitable for credit scoring applications where decisions have significant human impact.
+
+By replicating SHAP manually, we aim to:
+
+- Gain deeper insight into the mechanics of explainable AI
+- Validate model decisions through transparent breakdowns
+- Provide stakeholders and regulators with interpretable model behavior
+
+This research bridges the gap between theoretical fairness requirements and practical implementation in real-world credit assessment systems.
+
+
+---
+
+## 2. Application Domain
+
+Credit scoring is a critical application within financial services, where institutions assess an applicant's ability to repay a loan. An accurate and interpretable model helps mitigate financial risk and ensures transparency in lending decisions.
+
+By applying SHAP to a Random Forest model on the German Credit dataset, this project addresses the need for explainability in high-stakes domains. Regulatory frameworks (like the EU GDPR or U.S. Equal Credit Opportunity Act) increasingly require that decisions be interpretable and non-discriminatory. Hence, this research not only contributes technically but also aligns with real-world financial compliance needs.
+
+In this application domain, local explanations can help loan officers understand individual decisions, while global explanations support model auditability and fairness assessments.
+
+
+
+---
+
+## 3. Methodology
+
+### 3.1 Dataset and Model Overview
+
+We used the [**German Credit Dataset**](https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data)), a structured dataset frequently applied in credit risk modeling research. It contains 1000 samples with 20 features and a binary target variable indicating creditworthiness (good or bad credit risk). The features include a mix of categorical and numerical variables such as:
+
+- `checking_status`: status of existing checking account
+- `duration`: duration in months
+- `credit_history`: credit history records
+- `purpose`: purpose of the loan (e.g., car, education)
+- `credit_amount`: amount of credit requested
+- `savings_status`: status of savings account/bonds
+- `employment`: years of current employment
+- `installment_commitment`: installment rate as a percentage of income
+- `personal_status`: personal and sex status
+- `other_parties`: other debtors/guarantors
+- `residence_since`: years living at current residence
+- `property_magnitude`: value of assets
+- `age`: applicant's age
+- `other_payment_plans`: presence of other installment plans
+- `housing`: housing situation
+- `existing_credits`: number of existing credits
+- `job`: type of job
+- `num_dependents`: number of dependents
+- `own_telephone`: ownership of a telephone
+- `foreign_worker`: foreign worker status
+
+These features offer a comprehensive view of an individual's financial and personal profile relevant to credit decision-making.
+
+We selected the **Random Forest classifier** due to its ensemble-based structure that combines multiple decision trees to improve predictive performance and reduce overfitting. Random Forest is particularly suitable for tabular data with mixed data types (categorical and numerical), and it performs implicit feature selection by evaluating feature importance across many trees. Each tree in the forest is trained on a bootstrap sample of the data and considers a random subset of features at each split, which enhances diversity and robustness. Furthermore, Random Forest supports probability estimation and has interpretable decision paths, making it a practical and explainable choice for credit risk modeling.
+
+**Why Random Forest?**
+
+- **Robust performance on tabular data** — It effectively handles both numerical and categorical variables.
+- **Resistance to overfitting** — Uses bootstrap aggregation and feature randomness.
+- **Feature importance** — Naturally provides importance rankings useful for interpretability.
+- **Interpretability** — Easier to interpret than black-box models like neural networks.
+- **Practicality** — Widely supported, scalable, and requires minimal hyperparameter tuning.
+
+### 3.2 Explainability Method: SHAP
+
+SHAP (SHapley Additive exPlanations) is an explainability framework based on Shapley values from cooperative game theory. It provides consistent and locally accurate attributions of a model’s output to each input feature.
+
+In SHAP, the prediction task is treated as a game in which the "players" are the input features. The goal is to fairly allocate the model output (gain) among the features based on their contribution to the prediction.
+
+#### How SHAP Works
+
+- It considers all possible feature combinations (subsets) and computes the marginal contribution of a feature across these combinations.
+- Each SHAP value represents how much a feature contributes to the difference between the actual prediction and the mean prediction (baseline).
+- The final explanation is a sum of these SHAP values and the base value.
+
+Mathematically, for a model \(f\), the SHAP value for feature \(i\) is:
+
+$$
+\phi_i = \sum_{S \subseteq F \setminus \{i\}} \frac{|S|!(|F| - |S| - 1)!}{|F|!} \left[ f(S \cup \{i\}) - f(S) \right]
+$$
+
+Where:
+
+- \(F\): the set of all features
+- \(S\): a subset of features excluding \(i\)
+
+This formulation ensures fairness (symmetry), efficiency (conservation of output), and consistency across feature attributions.
+
+SHAP is model-agnostic and provides a unified measure of feature importance for both global and local interpretability.
+
+### 3.3 Kernel SHAP Approximation
+
+Kernel SHAP is a model-agnostic approximation algorithm that estimates SHAP values using a weighted linear regression. It is especially useful when the exact computation of Shapley values is computationally infeasible, such as in models with many input features.
+
+Key aspects of Kernel SHAP:
+
+- It treats the original model as a black box.
+- It samples feature subsets and queries the model to observe how predictions change.
+- It solves a weighted least squares problem to estimate each feature's SHAP value.
+- Weights are chosen such that smaller subsets (i.e., those missing more features) have higher influence.
+
+Kernel SHAP combines the strengths of LIME (local surrogate models) and Shapley theory, and provides theoretically grounded explanations even when the model internals are not accessible.
+
+Since computing all {{