From f4b128bb18cc6c701ecc290c1f47566e9e863cd3 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sat, 27 Dec 2025 22:00:44 +0100 Subject: [PATCH 1/6] add venv to gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 5934c80c..682e8e30 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,8 @@ MANIFEST # Setuptools SCM doubleml/_version.py + +# Virtual environment +.venv +venv/ +env/ From 92d4ef3a9c24ea2e5ee0192443b7792bea369bdf Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sat, 27 Dec 2025 22:01:09 +0100 Subject: [PATCH 2/6] mvp support for maketables --- doubleml/double_ml.py | 3 +- doubleml/utils/maketables_mixin.py | 195 +++++++ doubleml/utils/tests/test_maketables_mixin.py | 490 ++++++++++++++++++ 3 files changed, 687 insertions(+), 1 deletion(-) create mode 100644 doubleml/utils/maketables_mixin.py create mode 100644 doubleml/utils/tests/test_maketables_mixin.py diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 0d10dea5..66a9eb80 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -12,6 +12,7 @@ from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework from doubleml.double_ml_sampling_mixins import SampleSplittingMixin from doubleml.utils._checks import _check_external_predictions +from doubleml.utils.maketables_mixin import MakeTablesMixin from doubleml.utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est from doubleml.utils._sensitivity import _compute_sensitivity_bias from doubleml.utils._tune_optuna import OPTUNA_GLOBAL_SETTING_KEYS, TUNE_ML_MODELS_DOC, resolve_optuna_cv @@ -20,7 +21,7 @@ _implemented_data_backends = ["DoubleMLData", "DoubleMLClusterData", "DoubleMLDIDData", "DoubleMLSSMData", "DoubleMLRDDData"] -class DoubleML(SampleSplittingMixin, ABC): +class DoubleML(MakeTablesMixin, SampleSplittingMixin, ABC): """Double Machine Learning.""" def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, double_sample_splitting=False): diff --git a/doubleml/utils/maketables_mixin.py b/doubleml/utils/maketables_mixin.py new file mode 100644 index 00000000..5eb888bf --- /dev/null +++ b/doubleml/utils/maketables_mixin.py @@ -0,0 +1,195 @@ +""" +MakeTables Mixin for DoubleML Models. + +This module provides a mixin class that adds MakeTables plug-in support to DoubleML models. +The mixin implements the three required attributes for MakeTables compatibility: +- __maketables_coef_table__: Returns coefficient table as DataFrame +- __maketables_stat__: Returns model statistics by key +- __maketables_depvar__: Returns dependent variable name + +This enables zero-coupling integration with MakeTables - DoubleML never imports maketables, +but models automatically work with it when users have maketables installed. +""" + +import numpy as np +import pandas as pd + + +class MakeTablesMixin: + """ + Mixin class for MakeTables plug-in support. + + This mixin adds three attributes that enable DoubleML models to automatically work + with the MakeTables package for creating publication-ready regression tables. + + The plug-in format uses duck typing - MakeTables automatically detects these + attributes when present, without requiring any imports or dependencies. + + Attributes + ---------- + __maketables_coef_table__ : pd.DataFrame (property) + Coefficient table with columns 'b' (estimates), 'se' (standard errors), + 'p' (p-values), 't' (t-statistics), 'ci95l', 'ci95u' (95% CI bounds). + + __maketables_depvar__ : str (property) + Name of the dependent variable. + + __maketables_default_stat_keys__ : list (property) + Default statistics to display in tables. + + Methods + ------- + __maketables_stat__(key) + Return model statistic by key (e.g., 'N' for number of observations). + + Examples + -------- + >>> from doubleml import DoubleMLPLR + >>> # After fitting a DoubleML model + >>> dml_plr.fit() + >>> # Access maketables attributes + >>> coef_table = dml_plr.__maketables_coef_table__ + >>> n_obs = dml_plr.__maketables_stat__('N') + >>> depvar = dml_plr.__maketables_depvar__ + """ + + @property + def __maketables_coef_table__(self) -> pd.DataFrame: + """ + Return coefficient table with all required and optional columns for MakeTables. + + Returns a pandas DataFrame with coefficient estimates, standard errors, p-values, + t-statistics, and 95% confidence intervals. The DataFrame index matches the + treatment variable names from the fitted model. + + Returns + ------- + pd.DataFrame + Coefficient table with columns: + - 'b': coefficient estimates (required) + - 'se': standard errors (required) + - 'p': p-values (required) + - 't': t-statistics (optional) + - 'ci95l': lower 95% confidence interval bound (optional) + - 'ci95u': upper 95% confidence interval bound (optional) + + Notes + ----- + - Returns empty DataFrame with correct columns if model is unfitted or all coefficients are NaN + - Index is set to match the summary table index (treatment variable names) + - Handles edge cases gracefully without raising errors + """ + # Handle unfitted model + if not hasattr(self, "coef") or self.coef is None: + return pd.DataFrame(columns=["b", "se", "t", "p", "ci95l", "ci95u"]) + + # Handle NaN coefficients (model fitted but no valid estimates) + if np.isnan(self.coef).all(): + return pd.DataFrame(columns=["b", "se", "t", "p", "ci95l", "ci95u"]) + + # Get 95% confidence intervals + ci = self.confint(level=0.95) + + # Build coefficient table with required and optional columns + coef_table = pd.DataFrame( + { + "b": self.coef, # Required: coefficient estimates + "se": self.se, # Required: standard errors + "p": self.pval, # Required: p-values + "t": self.t_stat, # Optional: t-statistics + "ci95l": ci.iloc[:, 0], # Optional: lower 95% CI bound + "ci95u": ci.iloc[:, 1], # Optional: upper 95% CI bound + } + ) + + # Set index to match summary table (handles treatment variable names) + if hasattr(self, "summary") and self.summary is not None and len(self.summary) > 0: + coef_table.index = self.summary.index + + return coef_table + + def __maketables_stat__(self, key: str): + """ + Return model statistic by key. + + Parameters + ---------- + key : str + The statistic key to retrieve. Common keys include: + - 'N': number of observations + - 'r2': R-squared (not applicable for DoubleML) + - 'adj_r2': adjusted R-squared (not applicable for DoubleML) + - 'aic': Akaike Information Criterion (not applicable for DoubleML) + - 'bic': Bayesian Information Criterion (not applicable for DoubleML) + - 'll': log-likelihood (not applicable for DoubleML) + + Returns + ------- + float, int, or None + The requested statistic value, or None if not available or not applicable. + + Notes + ----- + DoubleML focuses on causal inference, not prediction, so traditional model fit + statistics like R-squared, AIC, and BIC are not applicable and will return None. + Currently only 'N' (number of observations) is supported. + + Examples + -------- + >>> n_obs = dml_model.__maketables_stat__('N') + >>> r2 = dml_model.__maketables_stat__('r2') # Returns None + """ + stats_map = { + "N": self.n_obs if hasattr(self, "n_obs") else None, + } + return stats_map.get(key, None) + + @property + def __maketables_depvar__(self) -> str: + """ + Return the name of the dependent variable. + + Returns + ------- + str + Name of the dependent (outcome) variable. Defaults to "Y" if not available. + + Notes + ----- + Retrieves the dependent variable name from the DoubleMLData object's y_col attribute. + Falls back to "Y" if the attribute is not available. + + Examples + -------- + >>> depvar = dml_model.__maketables_depvar__ + >>> print(depvar) + 'Y' + """ + if hasattr(self, "_dml_data") and hasattr(self._dml_data, "y_col"): + return self._dml_data.y_col + return "Y" # Fallback + + @property + def __maketables_default_stat_keys__(self) -> list: + """ + Return default statistics to display in MakeTables output. + + Returns + ------- + list + List of statistic keys to display by default. For DoubleML models, + this is ['N'] (number of observations). + + Notes + ----- + This is an optional attribute that helps MakeTables know which statistics + to include in the table by default. Users can override this when calling + ETable() by specifying the model_stats parameter. + + Examples + -------- + >>> default_stats = dml_model.__maketables_default_stat_keys__ + >>> print(default_stats) + ['N'] + """ + return ["N"] diff --git a/doubleml/utils/tests/test_maketables_mixin.py b/doubleml/utils/tests/test_maketables_mixin.py new file mode 100644 index 00000000..870196c8 --- /dev/null +++ b/doubleml/utils/tests/test_maketables_mixin.py @@ -0,0 +1,490 @@ +""" +Tests for MakeTables Mixin. + +This module tests the MakeTables plug-in support for DoubleML models, +verifying that the mixin correctly provides coefficient tables, statistics, +and dependent variable names for use with the MakeTables package. +""" + +import numpy as np +import pandas as pd +import pytest +from sklearn.linear_model import LinearRegression, Lasso + +import doubleml as dml + + +@pytest.fixture(scope="module") +def generate_plr_data(): + """Generate simple data for PLR model testing.""" + np.random.seed(42) + n = 500 + p = 5 + theta = 0.5 + + # Generate simple data + X = np.random.normal(size=(n, p)) + d = 0.5 * X[:, 0] + np.random.normal(size=n) + y = theta * d + X[:, 1] + np.random.normal(size=n) + + df = pd.DataFrame( + np.column_stack((X, y, d)), + columns=[f"X{i+1}" for i in range(p)] + ["Y", "D"] + ) + + return dml.DoubleMLData(df, "Y", "D") + + +@pytest.fixture(scope="module") +def fitted_plr_model(generate_plr_data): + """Create and fit a simple PLR model for testing.""" + ml_l = LinearRegression() + ml_m = LinearRegression() + + dml_plr = dml.DoubleMLPLR( + generate_plr_data, + ml_l, + ml_m, + n_folds=2, + score="partialling out" + ) + dml_plr.fit() + + return dml_plr + + +@pytest.fixture(scope="module") +def unfitted_plr_model(generate_plr_data): + """Create an unfitted PLR model for testing edge cases.""" + ml_l = LinearRegression() + ml_m = LinearRegression() + + dml_plr = dml.DoubleMLPLR( + generate_plr_data, + ml_l, + ml_m, + n_folds=2, + score="partialling out" + ) + + return dml_plr + + +@pytest.fixture(scope="module") +def generate_irm_data(): + """Generate simple data for IRM model testing.""" + np.random.seed(43) + n = 500 + p = 5 + + # Generate simple data with binary treatment + X = np.random.normal(size=(n, p)) + propensity = 1 / (1 + np.exp(-X[:, 0])) + d = (np.random.uniform(size=n) < propensity).astype(float) + y = 0.5 * d + X[:, 1] + np.random.normal(size=n) + + df = pd.DataFrame( + np.column_stack((X, y, d)), + columns=[f"X{i+1}" for i in range(p)] + ["Y", "D"] + ) + + return dml.DoubleMLData(df, "Y", "D") + + +@pytest.fixture(scope="module") +def fitted_irm_model(generate_irm_data): + """Create and fit a simple IRM model for testing.""" + from sklearn.linear_model import LogisticRegression + + ml_g = LinearRegression() + ml_m = LogisticRegression() + + dml_irm = dml.DoubleMLIRM( + generate_irm_data, + ml_g, + ml_m, + n_folds=2, + score="ATE" + ) + dml_irm.fit() + + return dml_irm + + +# ================================================================================== +# Test Coefficient Table Structure +# ================================================================================== + + +@pytest.mark.ci +def test_coef_table_has_required_columns(fitted_plr_model): + """Test that coefficient table has all required columns.""" + coef_table = fitted_plr_model.__maketables_coef_table__ + + # Check DataFrame type + assert isinstance(coef_table, pd.DataFrame) + + # Check required columns exist + assert "b" in coef_table.columns, "Missing required column 'b'" + assert "se" in coef_table.columns, "Missing required column 'se'" + assert "p" in coef_table.columns, "Missing required column 'p'" + + +@pytest.mark.ci +def test_coef_table_has_optional_columns(fitted_plr_model): + """Test that coefficient table has optional columns.""" + coef_table = fitted_plr_model.__maketables_coef_table__ + + # Check optional columns exist + assert "t" in coef_table.columns, "Missing optional column 't'" + assert "ci95l" in coef_table.columns, "Missing optional column 'ci95l'" + assert "ci95u" in coef_table.columns, "Missing optional column 'ci95u'" + + +@pytest.mark.ci +def test_coef_table_index_matches_summary(fitted_plr_model): + """Test that coefficient table index matches summary index.""" + coef_table = fitted_plr_model.__maketables_coef_table__ + summary = fitted_plr_model.summary + + # Index should match treatment variable names + assert list(coef_table.index) == list(summary.index) + + +# ================================================================================== +# Test Coefficient Table Values +# ================================================================================== + + +@pytest.mark.ci +def test_coef_table_values_match_model(fitted_plr_model): + """Test that coefficient table values match the model's estimates.""" + coef_table = fitted_plr_model.__maketables_coef_table__ + + # Check coefficient estimates + np.testing.assert_array_almost_equal( + coef_table["b"].values, + fitted_plr_model.coef, + decimal=10, + err_msg="Coefficient estimates don't match" + ) + + # Check standard errors + np.testing.assert_array_almost_equal( + coef_table["se"].values, + fitted_plr_model.se, + decimal=10, + err_msg="Standard errors don't match" + ) + + # Check t-statistics + np.testing.assert_array_almost_equal( + coef_table["t"].values, + fitted_plr_model.t_stat, + decimal=10, + err_msg="T-statistics don't match" + ) + + # Check p-values + np.testing.assert_array_almost_equal( + coef_table["p"].values, + fitted_plr_model.pval, + decimal=10, + err_msg="P-values don't match" + ) + + +@pytest.mark.ci +def test_coef_table_confidence_intervals(fitted_plr_model): + """Test that confidence intervals match confint() method.""" + coef_table = fitted_plr_model.__maketables_coef_table__ + ci = fitted_plr_model.confint(level=0.95) + + # Check lower CI bound + np.testing.assert_array_almost_equal( + coef_table["ci95l"].values, + ci.iloc[:, 0].values, + decimal=10, + err_msg="Lower CI bounds don't match" + ) + + # Check upper CI bound + np.testing.assert_array_almost_equal( + coef_table["ci95u"].values, + ci.iloc[:, 1].values, + decimal=10, + err_msg="Upper CI bounds don't match" + ) + + +# ================================================================================== +# Test Statistics Method +# ================================================================================== + + +@pytest.mark.ci +def test_stat_method_returns_n_obs(fitted_plr_model): + """Test that __maketables_stat__ returns number of observations for key 'N'.""" + n_obs = fitted_plr_model.__maketables_stat__("N") + + assert n_obs is not None, "Should return number of observations" + assert n_obs == fitted_plr_model.n_obs, "N should match model's n_obs" + assert isinstance(n_obs, (int, np.integer)), "N should be an integer" + + +@pytest.mark.ci +def test_stat_method_unknown_keys_return_none(fitted_plr_model): + """Test that __maketables_stat__ returns None for unknown keys.""" + # Unknown key should return None + assert fitted_plr_model.__maketables_stat__("unknown_key") is None + + # Empty string should return None + assert fitted_plr_model.__maketables_stat__("") is None + + +@pytest.mark.ci +def test_stat_method_traditional_stats_return_none(fitted_plr_model): + """Test that traditional stats (r2, aic, bic) return None for causal models.""" + # R-squared not applicable for causal inference + assert fitted_plr_model.__maketables_stat__("r2") is None + assert fitted_plr_model.__maketables_stat__("adj_r2") is None + + # Information criteria not applicable + assert fitted_plr_model.__maketables_stat__("aic") is None + assert fitted_plr_model.__maketables_stat__("bic") is None + + # Log-likelihood not applicable + assert fitted_plr_model.__maketables_stat__("ll") is None + + +# ================================================================================== +# Test Dependent Variable +# ================================================================================== + + +@pytest.mark.ci +def test_depvar_returns_string(fitted_plr_model): + """Test that __maketables_depvar__ returns a string.""" + depvar = fitted_plr_model.__maketables_depvar__ + + assert isinstance(depvar, str), "Dependent variable name should be a string" + + +@pytest.mark.ci +def test_depvar_matches_data(fitted_plr_model): + """Test that __maketables_depvar__ matches the actual dependent variable.""" + depvar = fitted_plr_model.__maketables_depvar__ + + assert depvar == "Y", "Dependent variable should be 'Y'" + assert depvar == fitted_plr_model._dml_data.y_col, "Should match data's y_col" + + +# ================================================================================== +# Test Default Statistics Keys +# ================================================================================== + + +@pytest.mark.ci +def test_default_stat_keys_returns_list(fitted_plr_model): + """Test that __maketables_default_stat_keys__ returns a list.""" + default_keys = fitted_plr_model.__maketables_default_stat_keys__ + + assert isinstance(default_keys, list), "Default stat keys should be a list" + + +@pytest.mark.ci +def test_default_stat_keys_contains_n(fitted_plr_model): + """Test that default statistics include 'N'.""" + default_keys = fitted_plr_model.__maketables_default_stat_keys__ + + assert "N" in default_keys, "Default statistics should include 'N'" + + +# ================================================================================== +# Test Multiple Model Types +# ================================================================================== + + +@pytest.mark.ci +def test_maketables_works_with_irm_model(fitted_irm_model): + """Test that maketables mixin works with IRM models.""" + # Should have coefficient table + coef_table = fitted_irm_model.__maketables_coef_table__ + assert isinstance(coef_table, pd.DataFrame) + assert "b" in coef_table.columns + assert "se" in coef_table.columns + assert "p" in coef_table.columns + + # Should return N statistic + assert fitted_irm_model.__maketables_stat__("N") == fitted_irm_model.n_obs + + # Should return depvar + assert fitted_irm_model.__maketables_depvar__ == "Y" + + +@pytest.mark.ci +def test_maketables_works_with_pliv_model(generate_plr_data): + """Test that maketables mixin works with PLIV models.""" + from doubleml.plm.datasets import make_pliv_CHS2015 + + # Generate IV data + np.random.seed(44) + data = make_pliv_CHS2015(n_obs=500, dim_x=5, alpha=0.5, dim_z=1, return_type=pd.DataFrame) + + x_cols = [col for col in data.columns if col.startswith("X")] + dml_data = dml.DoubleMLData(data, "y", "d", x_cols, z_cols="Z1") + + ml_l = LinearRegression() + ml_m = LinearRegression() + ml_r = LinearRegression() + + dml_pliv = dml.DoubleMLPLIV(dml_data, ml_l, ml_m, ml_r, n_folds=2) + dml_pliv.fit() + + # Should have coefficient table + coef_table = dml_pliv.__maketables_coef_table__ + assert isinstance(coef_table, pd.DataFrame) + assert "b" in coef_table.columns + + # Should return N statistic + assert dml_pliv.__maketables_stat__("N") == dml_pliv.n_obs + + # Should return depvar + assert dml_pliv.__maketables_depvar__ == "y" + + +# ================================================================================== +# Test Edge Cases +# ================================================================================== + + +@pytest.mark.ci +def test_unfitted_model_returns_empty_dataframe(unfitted_plr_model): + """Test that unfitted model returns empty DataFrame with correct columns.""" + coef_table = unfitted_plr_model.__maketables_coef_table__ + + assert isinstance(coef_table, pd.DataFrame), "Should return DataFrame" + assert len(coef_table) == 0, "Should be empty for unfitted model" + + # Should still have correct columns + expected_columns = ["b", "se", "t", "p", "ci95l", "ci95u"] + assert list(coef_table.columns) == expected_columns + + +@pytest.mark.ci +def test_unfitted_model_stat_returns_n(unfitted_plr_model): + """Test that unfitted model can still return N statistic.""" + n_obs = unfitted_plr_model.__maketables_stat__("N") + + # Should still have n_obs even if not fitted + assert n_obs is not None + assert n_obs == unfitted_plr_model.n_obs + + +@pytest.mark.ci +def test_unfitted_model_depvar_works(unfitted_plr_model): + """Test that unfitted model can return dependent variable name.""" + depvar = unfitted_plr_model.__maketables_depvar__ + + assert depvar == "Y" + + +@pytest.mark.ci +def test_multi_treatment_model(): + """Test that maketables works with multiple treatment variables.""" + np.random.seed(45) + n = 500 + p = 5 + + # Generate data with 2 treatments + X = np.random.normal(size=(n, p)) + D1 = 0.5 * X[:, 0] + np.random.normal(size=n) + D2 = 0.3 * X[:, 1] + np.random.normal(size=n) + Y = 0.5 * D1 + 0.7 * D2 + X[:, 2] + np.random.normal(size=n) + + df = pd.DataFrame( + np.column_stack((X, Y, D1, D2)), + columns=[f"X{i+1}" for i in range(p)] + ["Y", "D1", "D2"] + ) + + dml_data = dml.DoubleMLData(df, "Y", ["D1", "D2"]) + + ml_l = LinearRegression() + ml_m = LinearRegression() + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out") + dml_plr.fit() + + # Coefficient table should have 2 rows (one per treatment) + coef_table = dml_plr.__maketables_coef_table__ + assert len(coef_table) == 2, "Should have 2 rows for 2 treatments" + assert list(coef_table.index) == ["D1", "D2"], "Index should be treatment names" + + +# ================================================================================== +# Test Integration (Optional - requires maketables) +# ================================================================================== + + +@pytest.mark.ci +def test_integration_with_maketables_if_available(fitted_plr_model): + """Test integration with maketables package if it's installed.""" + pytest.importorskip("maketables", reason="maketables not installed") + + from maketables import ETable + + # Should be able to create a table + try: + table = ETable([fitted_plr_model]) + assert table is not None + + # Should be able to render + text_output = table.render("txt") + assert isinstance(text_output, str) + assert len(text_output) > 0 + + except Exception as e: + pytest.fail(f"MakeTables integration failed: {str(e)}") + + +# ================================================================================== +# Test Comparison with Summary +# ================================================================================== + + +@pytest.mark.ci +def test_coef_table_consistent_with_summary(fitted_plr_model): + """Test that coefficient table is consistent with summary property.""" + coef_table = fitted_plr_model.__maketables_coef_table__ + summary = fitted_plr_model.summary + + # Same index + assert list(coef_table.index) == list(summary.index) + + # Coefficients match + np.testing.assert_array_almost_equal( + coef_table["b"].values, + summary["coef"].values, + decimal=10 + ) + + # Standard errors match + np.testing.assert_array_almost_equal( + coef_table["se"].values, + summary["std err"].values, + decimal=10 + ) + + # T-statistics match + np.testing.assert_array_almost_equal( + coef_table["t"].values, + summary["t"].values, + decimal=10 + ) + + # P-values match + np.testing.assert_array_almost_equal( + coef_table["p"].values, + summary["P>|t|"].values, + decimal=10 + ) From 6a890a9a6b0fdb68a43a54e011aed43af0c3d39d Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sat, 27 Dec 2025 22:07:57 +0100 Subject: [PATCH 3/6] add example nb --- maketables_demo.ipynb | 1279 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1279 insertions(+) create mode 100644 maketables_demo.ipynb diff --git a/maketables_demo.ipynb b/maketables_demo.ipynb new file mode 100644 index 00000000..72b68445 --- /dev/null +++ b/maketables_demo.ipynb @@ -0,0 +1,1279 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DoubleML + MakeTables Integration Demo\n", + "\n", + "This notebook demonstrates the MakeTables integration with DoubleML, showing how to create publication-ready regression tables with beautiful HTML and LaTeX output." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.linear_model import LinearRegression, LogisticRegression\n", + "import doubleml as dml\n", + "from maketables import ETable\n", + "\n", + "# Set random seed for reproducibility\n", + "np.random.seed(42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 1: Basic PLR Model\n", + "\n", + "Let's start with a simple Partially Linear Regression (PLR) model estimating the effect of education on income." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data shape: (1000, 12)\n", + "\n", + "First few rows:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
X1X2X3X4X5X6X7X8X9X10incomeeducation
00.496714-0.1382640.6476891.523030-0.234153-0.2341371.5792130.767435-0.4694740.542560-0.146270-0.471617
1-0.463418-0.4657300.241962-1.913280-1.724918-0.562288-1.0128310.314247-0.908024-1.412304-0.823369-0.676927
21.465649-0.2257760.067528-1.424748-0.5443830.110923-1.1509940.375698-0.600639-0.2916940.5200760.067710
3-0.6017071.852278-0.013497-1.0577110.822545-1.2208440.208864-1.959670-1.3281860.1968610.5050310.365248
40.7384670.171368-0.115648-0.301104-1.478522-0.719844-0.4606391.0571220.343618-1.7630402.1125351.617822
\n", + "
" + ], + "text/plain": [ + " X1 X2 X3 X4 X5 X6 X7 \\\n", + "0 0.496714 -0.138264 0.647689 1.523030 -0.234153 -0.234137 1.579213 \n", + "1 -0.463418 -0.465730 0.241962 -1.913280 -1.724918 -0.562288 -1.012831 \n", + "2 1.465649 -0.225776 0.067528 -1.424748 -0.544383 0.110923 -1.150994 \n", + "3 -0.601707 1.852278 -0.013497 -1.057711 0.822545 -1.220844 0.208864 \n", + "4 0.738467 0.171368 -0.115648 -0.301104 -1.478522 -0.719844 -0.460639 \n", + "\n", + " X8 X9 X10 income education \n", + "0 0.767435 -0.469474 0.542560 -0.146270 -0.471617 \n", + "1 0.314247 -0.908024 -1.412304 -0.823369 -0.676927 \n", + "2 0.375698 -0.600639 -0.291694 0.520076 0.067710 \n", + "3 -1.959670 -1.328186 0.196861 0.505031 0.365248 \n", + "4 1.057122 0.343618 -1.763040 2.112535 1.617822 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Generate synthetic data\n", + "n = 1000\n", + "p = 10\n", + "\n", + "X = np.random.normal(size=(n, p))\n", + "education = 0.5 * X[:, 0] + 0.3 * X[:, 1] + np.random.normal(size=n)\n", + "income = 0.8 * education + X[:, 2] + 0.5 * X[:, 3] + np.random.normal(size=n)\n", + "\n", + "df = pd.DataFrame(\n", + " np.column_stack((X, income, education)),\n", + " columns=[f\"X{i+1}\" for i in range(p)] + [\"income\", \"education\"]\n", + ")\n", + "\n", + "print(f\"Data shape: {df.shape}\")\n", + "print(f\"\\nFirst few rows:\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DoubleML Summary:\n", + " coef std err t P>|t| 2.5 % 97.5 %\n", + "education 0.830282 0.032441 25.593745 1.790892e-144 0.766699 0.893865\n" + ] + } + ], + "source": [ + "# Prepare data for DoubleML\n", + "dml_data = dml.DoubleMLData(df, \"income\", \"education\")\n", + "\n", + "# Fit PLR model\n", + "ml_l = LinearRegression()\n", + "ml_m = LinearRegression()\n", + "\n", + "dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=5, score=\"partialling out\")\n", + "dml_plr.fit()\n", + "\n", + "# Show standard DoubleML summary\n", + "print(\"DoubleML Summary:\")\n", + "print(dml_plr.summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Inspect MakeTables Attributes\n", + "\n", + "The model now has special `__maketables_*` attributes that MakeTables uses to create tables:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Coefficient Table (__maketables_coef_table__):\n", + " b se p t ci95l ci95u\n", + "education 0.830282 0.032441 1.790892e-144 25.593745 0.766699 0.893865\n", + "\n", + "Sample Size: 1000\n", + "Dependent Variable: income\n", + "Default Statistics: ['N']\n" + ] + } + ], + "source": [ + "# Coefficient table\n", + "print(\"Coefficient Table (__maketables_coef_table__):\")\n", + "print(dml_plr.__maketables_coef_table__)\n", + "\n", + "print(f\"\\nSample Size: {dml_plr.__maketables_stat__('N')}\")\n", + "print(f\"Dependent Variable: {dml_plr.__maketables_depvar__}\")\n", + "print(f\"Default Statistics: {dml_plr.__maketables_default_stat_keys__}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Table with MakeTables\n", + "\n", + "Now let's create a publication-ready table using MakeTables:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
\n", + " income\n", + "
(1)
coef
education0.830***
(0.032)
stats
Observations1,000
Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell: Coefficient (Std. Error)
\n", + "\n", + "
\n", + " " + ], + "text/latex": [ + "\\begin{threeparttable}\n", + "\\begingroup\n", + "\\renewcommand\\cellalign{t}\n", + "\\renewcommand\\arraystretch{1}\n", + "\\setlength{\\tabcolsep}{3pt}\n", + "\\begin{tabularx}{\\linewidth}{@{}>{\\raggedright\\arraybackslash}l>{\\centering\\arraybackslash}X}\n", + "\\toprule\n", + " & \\multicolumn{1}{c}{income} \\\\\n", + "\\cmidrule(lr){2-2}\n", + " & (1) \\\\\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "education & \\makecell{0.830*** \\\\ (0.032)} \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "Observations & 1,000 \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\bottomrule\n", + "\\end{tabularx}\n", + "\\endgroup\n", + "\\noindent\\begin{minipage}{\\linewidth}\\smallskip\\footnotesize\n", + "Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell: Coefficient (Std. Error)\\end{minipage}\n", + "\n", + "\\end{threeparttable}" + ], + "text/plain": [ + ".DualOutput at 0x128881f70>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create table\n", + "table = ETable([dml_plr], show_se=True, model_stats=['N'])\n", + "\n", + "table" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LaTeX Table Code:\n", + "\\begin{threeparttable}\n", + "\\begingroup\n", + "\\renewcommand\\cellalign{t}\n", + "\\renewcommand\\arraystretch{1}\n", + "\\setlength{\\tabcolsep}{3pt}\n", + "\\begin{tabularx}{\\linewidth}{@{}>{\\raggedright\\arraybackslash}l>{\\centering\\arraybackslash}X}\n", + "\\toprule\n", + " & \\multicolumn{1}{c}{income} \\\\\n", + "\\cmidrule(lr){2-2}\n", + " & (1) \\\\\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "education & \\makecell{0.830*** \\\\ (0.032)} \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "Observations & 1,000 \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\bottomrule\n", + "\\end{tabularx}\n", + "\\endgroup\n", + "\\noindent\\begin{minipage}{\\linewidth}\\smallskip\\footnotesize\n", + "Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell: Coefficient (Std. Error)\\end{minipage}\n", + "\n", + "\\end{threeparttable}\n" + ] + } + ], + "source": [ + "# Display LaTeX output\n", + "print(\"LaTeX Table Code:\")\n", + "print(table.make('tex'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 2: Comparing Multiple Models\n", + "\n", + "One of the strengths of MakeTables is easily comparing multiple models side-by-side." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model 1 (Education effect):\n", + " coef std err t P>|t| 2.5 % 97.5 %\n", + "education 0.620113 0.032109 19.31289 4.184858e-83 0.557181 0.683045\n", + "\n", + "Model 2 (Experience effect):\n", + " coef std err t P>|t| 2.5 % 97.5 %\n", + "experience 0.42868 0.032385 13.237065 5.360313e-40 0.365207 0.492153\n" + ] + } + ], + "source": [ + "# Generate data with two treatments\n", + "np.random.seed(43)\n", + "n = 1000\n", + "p = 8\n", + "\n", + "X = np.random.normal(size=(n, p))\n", + "education = 0.5 * X[:, 0] + 0.2 * X[:, 1] + np.random.normal(size=n)\n", + "experience = 0.3 * X[:, 2] + 0.4 * X[:, 3] + np.random.normal(size=n)\n", + "income = 0.6 * education + 0.4 * experience + X[:, 4] + np.random.normal(size=n)\n", + "\n", + "df2 = pd.DataFrame(\n", + " np.column_stack((X, income, education, experience)),\n", + " columns=[f\"X{i+1}\" for i in range(p)] + [\"income\", \"education\", \"experience\"]\n", + ")\n", + "\n", + "# Fit separate models for each treatment\n", + "dml_data_edu = dml.DoubleMLData(df2, \"income\", \"education\")\n", + "dml_data_exp = dml.DoubleMLData(df2, \"income\", \"experience\")\n", + "\n", + "dml_edu = dml.DoubleMLPLR(dml_data_edu, LinearRegression(), LinearRegression(), n_folds=5)\n", + "dml_exp = dml.DoubleMLPLR(dml_data_exp, LinearRegression(), LinearRegression(), n_folds=5)\n", + "\n", + "dml_edu.fit()\n", + "dml_exp.fit()\n", + "\n", + "print(\"Model 1 (Education effect):\")\n", + "print(dml_edu.summary)\n", + "print(\"\\nModel 2 (Experience effect):\")\n", + "print(dml_exp.summary)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
Comparison of Treatment Effects on Income
\n", + "  \n", + " \n", + " income\n", + "
\n", + " Education Model\n", + " \n", + " Experience Model\n", + "
(1)(2)
coef
education0.620***
(0.032)
experience0.429***
(0.032)
stats
Observations1,0001,000
Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell: Coefficient (Std. Error)
\n", + "\n", + "
\n", + " " + ], + "text/latex": [ + "\\begin{table}[htbp]\n", + "\\centering\n", + "\\caption{Comparison of Treatment Effects on Income}\n", + "\\smallskip\n", + "\\begin{threeparttable}\n", + "\\begingroup\n", + "\\renewcommand\\cellalign{t}\n", + "\\renewcommand\\arraystretch{1}\n", + "\\setlength{\\tabcolsep}{3pt}\n", + "\\begin{tabularx}{\\linewidth}{@{}>{\\raggedright\\arraybackslash}l>{\\centering\\arraybackslash}X>{\\centering\\arraybackslash}X}\n", + "\\toprule\n", + " & \\multicolumn{2}{c}{income} \\\\\n", + "\\cmidrule(lr){2-3}\n", + " & \\multicolumn{1}{c}{Education Model} & \\multicolumn{1}{c}{Experience Model} \\\\\n", + "\\cmidrule(lr){2-2} \\cmidrule(lr){3-3}\n", + " & (1) & (2) \\\\\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "education & \\makecell{0.620*** \\\\ (0.032)} & \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\addlinespace[0.5ex]\n", + "experience & & \\makecell{0.429*** \\\\ (0.032)} \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "Observations & 1,000 & 1,000 \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\bottomrule\n", + "\\end{tabularx}\n", + "\\endgroup\n", + "\\noindent\\begin{minipage}{\\linewidth}\\smallskip\\footnotesize\n", + "Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell: Coefficient (Std. Error)\\end{minipage}\n", + "\n", + "\\end{threeparttable}\n", + "\\end{table}" + ], + "text/plain": [ + ".DualOutput at 0x11f511250>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create comparison table\n", + "comparison_table = ETable(\n", + " [dml_edu, dml_exp],\n", + " show_se=True,\n", + " model_stats=['N'],\n", + " model_heads=['Education Model', 'Experience Model'],\n", + " caption='Comparison of Treatment Effects on Income'\n", + ")\n", + "\n", + "comparison_table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 3: Binary Treatment (IRM Model)\n", + "\n", + "Let's demonstrate with a binary treatment using the Interactive Regression Model (IRM)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "IRM Summary:\n", + " coef std err t P>|t| 2.5 % 97.5 %\n", + "treatment 0.635458 0.070924 8.959683 3.256131e-19 0.496449 0.774467\n" + ] + } + ], + "source": [ + "# Generate data with binary treatment\n", + "np.random.seed(44)\n", + "n = 1000\n", + "p = 8\n", + "\n", + "X = np.random.normal(size=(n, p))\n", + "propensity = 1 / (1 + np.exp(-0.5 * X[:, 0] - 0.3 * X[:, 1]))\n", + "treatment = (np.random.uniform(size=n) < propensity).astype(float)\n", + "outcome = 0.7 * treatment + X[:, 2] + 0.5 * X[:, 3] + np.random.normal(size=n)\n", + "\n", + "df_irm = pd.DataFrame(\n", + " np.column_stack((X, outcome, treatment)),\n", + " columns=[f\"X{i+1}\" for i in range(p)] + [\"outcome\", \"treatment\"]\n", + ")\n", + "\n", + "# Fit IRM model\n", + "dml_data_irm = dml.DoubleMLData(df_irm, \"outcome\", \"treatment\")\n", + "\n", + "dml_irm = dml.DoubleMLIRM(\n", + " dml_data_irm,\n", + " LinearRegression(),\n", + " LogisticRegression(max_iter=1000),\n", + " n_folds=5,\n", + " score=\"ATE\"\n", + ")\n", + "dml_irm.fit()\n", + "\n", + "print(\"IRM Summary:\")\n", + "print(dml_irm.summary)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
Average Treatment Effect (ATE) Estimation
\n", + " outcome\n", + "
(1)
coef
treatment0.635***
(0.071)
stats
Observations1,000
Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell: Coefficient (Std. Error)
\n", + "\n", + "
\n", + " " + ], + "text/latex": [ + "\\begin{table}[htbp]\n", + "\\centering\n", + "\\caption{Average Treatment Effect (ATE) Estimation}\n", + "\\smallskip\n", + "\\begin{threeparttable}\n", + "\\begingroup\n", + "\\renewcommand\\cellalign{t}\n", + "\\renewcommand\\arraystretch{1}\n", + "\\setlength{\\tabcolsep}{3pt}\n", + "\\begin{tabularx}{\\linewidth}{@{}>{\\raggedright\\arraybackslash}l>{\\centering\\arraybackslash}X}\n", + "\\toprule\n", + " & \\multicolumn{1}{c}{outcome} \\\\\n", + "\\cmidrule(lr){2-2}\n", + " & (1) \\\\\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "treatment & \\makecell{0.635*** \\\\ (0.071)} \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "Observations & 1,000 \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\bottomrule\n", + "\\end{tabularx}\n", + "\\endgroup\n", + "\\noindent\\begin{minipage}{\\linewidth}\\smallskip\\footnotesize\n", + "Significance levels: * p < 0.1, ** p < 0.05, *** p < 0.01. Format of coefficient cell: Coefficient (Std. Error)\\end{minipage}\n", + "\n", + "\\end{threeparttable}\n", + "\\end{table}" + ], + "text/plain": [ + ".DualOutput at 0x1296bdc10>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create table for IRM model\n", + "irm_table = ETable(\n", + " [dml_irm],\n", + " show_se=True,\n", + " model_stats=['N'],\n", + " caption='Average Treatment Effect (ATE) Estimation'\n", + ")\n", + "\n", + "irm_table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 4: Customized Table Formatting\n", + "\n", + "MakeTables allows extensive customization of table appearance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create table with custom formatting\n", + "custom_table = ETable(\n", + " [dml_plr],\n", + " coef_fmt=\"b:.3f \\n [ci95l:.3f, ci95u:.3f]\", # Show CI instead of SE\n", + " model_stats=['N'],\n", + " caption='Custom Formatted Table with Confidence Intervals',\n", + " notes='95% confidence intervals shown in brackets.'\n", + ")\n", + "\n", + "# Display HTML with custom styling\n", + "display(custom_table.make('html', gt_style={'table_font_size': '14px'}))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "
Table with t-statistics
\n", + " income\n", + "
(1)
coef
education0.8303
(25.59)
stats
Observations1,000
t-statistics shown in parentheses.
\n", + "\n", + "
\n", + " " + ], + "text/latex": [ + "\\begin{table}[htbp]\n", + "\\centering\n", + "\\caption{Table with t-statistics}\n", + "\\smallskip\n", + "\\begin{threeparttable}\n", + "\\begingroup\n", + "\\renewcommand\\cellalign{t}\n", + "\\renewcommand\\arraystretch{1}\n", + "\\setlength{\\tabcolsep}{3pt}\n", + "\\begin{tabularx}{\\linewidth}{@{}>{\\raggedright\\arraybackslash}l>{\\centering\\arraybackslash}X}\n", + "\\toprule\n", + " & \\multicolumn{1}{c}{income} \\\\\n", + "\\cmidrule(lr){2-2}\n", + " & (1) \\\\\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "education & \\makecell{0.8303 \\\\ (25.59)} \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\midrule\n", + "\\addlinespace[1ex]\n", + "Observations & 1,000 \\\\\n", + "\\addlinespace[0.5ex]\n", + "\\bottomrule\n", + "\\end{tabularx}\n", + "\\endgroup\n", + "\\noindent\\begin{minipage}{\\linewidth}\\smallskip\\footnotesize\n", + "t-statistics shown in parentheses.\\end{minipage}\n", + "\n", + "\\end{threeparttable}\n", + "\\end{table}" + ], + "text/plain": [ + ".DualOutput at 0x128881f70>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Another example: showing t-statistics\n", + "t_stat_table = ETable(\n", + " [dml_plr],\n", + " coef_fmt=\"b:.4f \\n (t:.2f)\", # Show t-stat instead of SE\n", + " model_stats=['N'],\n", + " caption='Table with t-statistics',\n", + " notes='t-statistics shown in parentheses.'\n", + ")\n", + "\n", + "t_stat_table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 5: Multiple Treatments in One Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Fit model with multiple treatments\n", + "dml_data_multi = dml.DoubleMLData(df2, \"income\", [\"education\", \"experience\"])\n", + "\n", + "dml_multi = dml.DoubleMLPLR(\n", + " dml_data_multi,\n", + " LinearRegression(),\n", + " LinearRegression(),\n", + " n_folds=5\n", + ")\n", + "dml_multi.fit()\n", + "\n", + "print(\"Multi-treatment Summary:\")\n", + "print(dml_multi.summary)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create table\n", + "multi_table = ETable(\n", + " [dml_multi],\n", + " show_se=True,\n", + " model_stats=['N'],\n", + " caption='Joint Estimation of Multiple Treatment Effects',\n", + " labels={'education': 'Years of Education', 'experience': 'Years of Experience'}\n", + ")\n", + "\n", + "multi_table.make('html')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving Tables\n", + "\n", + "You can save tables to files for use in your papers/presentations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# Save as LaTeX\ntable.save('tex', 'table_results.tex')\nprint(\"✅ Saved to table_results.tex\")\n\n# Save as HTML\ntable.save('html', 'table_results.html')\nprint(\"✅ Saved to table_results.html\")\n\n# Save as Word document\ntable.save('docx', 'table_results.docx')\nprint(\"✅ Saved to table_results.docx\")" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This notebook demonstrated:\n", + "\n", + "1. **Basic Integration**: DoubleML models automatically work with MakeTables\n", + "2. **Model Comparison**: Easy side-by-side comparison of multiple models\n", + "3. **Different Model Types**: Works with PLR, IRM, and other DoubleML models\n", + "4. **Customization**: Flexible formatting options for coefficients and statistics\n", + "5. **Multiple Treatments**: Handles models with multiple treatment variables\n", + "6. **Export Options**: Save to LaTeX, HTML, Word, or Typst formats\n", + "\n", + "### Key Advantages\n", + "\n", + "- **Zero Coupling**: DoubleML doesn't depend on MakeTables\n", + "- **Automatic Detection**: MakeTables finds the special attributes automatically\n", + "- **Publication Ready**: Beautiful tables suitable for papers and presentations\n", + "- **Flexible**: Extensive customization options available" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file From a5289558131a08f66878d145061c935218a6e42d Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 28 Dec 2025 09:19:47 +0100 Subject: [PATCH 4/6] delete tests --- doubleml/utils/tests/test_maketables_mixin.py | 490 ------------------ 1 file changed, 490 deletions(-) delete mode 100644 doubleml/utils/tests/test_maketables_mixin.py diff --git a/doubleml/utils/tests/test_maketables_mixin.py b/doubleml/utils/tests/test_maketables_mixin.py deleted file mode 100644 index 870196c8..00000000 --- a/doubleml/utils/tests/test_maketables_mixin.py +++ /dev/null @@ -1,490 +0,0 @@ -""" -Tests for MakeTables Mixin. - -This module tests the MakeTables plug-in support for DoubleML models, -verifying that the mixin correctly provides coefficient tables, statistics, -and dependent variable names for use with the MakeTables package. -""" - -import numpy as np -import pandas as pd -import pytest -from sklearn.linear_model import LinearRegression, Lasso - -import doubleml as dml - - -@pytest.fixture(scope="module") -def generate_plr_data(): - """Generate simple data for PLR model testing.""" - np.random.seed(42) - n = 500 - p = 5 - theta = 0.5 - - # Generate simple data - X = np.random.normal(size=(n, p)) - d = 0.5 * X[:, 0] + np.random.normal(size=n) - y = theta * d + X[:, 1] + np.random.normal(size=n) - - df = pd.DataFrame( - np.column_stack((X, y, d)), - columns=[f"X{i+1}" for i in range(p)] + ["Y", "D"] - ) - - return dml.DoubleMLData(df, "Y", "D") - - -@pytest.fixture(scope="module") -def fitted_plr_model(generate_plr_data): - """Create and fit a simple PLR model for testing.""" - ml_l = LinearRegression() - ml_m = LinearRegression() - - dml_plr = dml.DoubleMLPLR( - generate_plr_data, - ml_l, - ml_m, - n_folds=2, - score="partialling out" - ) - dml_plr.fit() - - return dml_plr - - -@pytest.fixture(scope="module") -def unfitted_plr_model(generate_plr_data): - """Create an unfitted PLR model for testing edge cases.""" - ml_l = LinearRegression() - ml_m = LinearRegression() - - dml_plr = dml.DoubleMLPLR( - generate_plr_data, - ml_l, - ml_m, - n_folds=2, - score="partialling out" - ) - - return dml_plr - - -@pytest.fixture(scope="module") -def generate_irm_data(): - """Generate simple data for IRM model testing.""" - np.random.seed(43) - n = 500 - p = 5 - - # Generate simple data with binary treatment - X = np.random.normal(size=(n, p)) - propensity = 1 / (1 + np.exp(-X[:, 0])) - d = (np.random.uniform(size=n) < propensity).astype(float) - y = 0.5 * d + X[:, 1] + np.random.normal(size=n) - - df = pd.DataFrame( - np.column_stack((X, y, d)), - columns=[f"X{i+1}" for i in range(p)] + ["Y", "D"] - ) - - return dml.DoubleMLData(df, "Y", "D") - - -@pytest.fixture(scope="module") -def fitted_irm_model(generate_irm_data): - """Create and fit a simple IRM model for testing.""" - from sklearn.linear_model import LogisticRegression - - ml_g = LinearRegression() - ml_m = LogisticRegression() - - dml_irm = dml.DoubleMLIRM( - generate_irm_data, - ml_g, - ml_m, - n_folds=2, - score="ATE" - ) - dml_irm.fit() - - return dml_irm - - -# ================================================================================== -# Test Coefficient Table Structure -# ================================================================================== - - -@pytest.mark.ci -def test_coef_table_has_required_columns(fitted_plr_model): - """Test that coefficient table has all required columns.""" - coef_table = fitted_plr_model.__maketables_coef_table__ - - # Check DataFrame type - assert isinstance(coef_table, pd.DataFrame) - - # Check required columns exist - assert "b" in coef_table.columns, "Missing required column 'b'" - assert "se" in coef_table.columns, "Missing required column 'se'" - assert "p" in coef_table.columns, "Missing required column 'p'" - - -@pytest.mark.ci -def test_coef_table_has_optional_columns(fitted_plr_model): - """Test that coefficient table has optional columns.""" - coef_table = fitted_plr_model.__maketables_coef_table__ - - # Check optional columns exist - assert "t" in coef_table.columns, "Missing optional column 't'" - assert "ci95l" in coef_table.columns, "Missing optional column 'ci95l'" - assert "ci95u" in coef_table.columns, "Missing optional column 'ci95u'" - - -@pytest.mark.ci -def test_coef_table_index_matches_summary(fitted_plr_model): - """Test that coefficient table index matches summary index.""" - coef_table = fitted_plr_model.__maketables_coef_table__ - summary = fitted_plr_model.summary - - # Index should match treatment variable names - assert list(coef_table.index) == list(summary.index) - - -# ================================================================================== -# Test Coefficient Table Values -# ================================================================================== - - -@pytest.mark.ci -def test_coef_table_values_match_model(fitted_plr_model): - """Test that coefficient table values match the model's estimates.""" - coef_table = fitted_plr_model.__maketables_coef_table__ - - # Check coefficient estimates - np.testing.assert_array_almost_equal( - coef_table["b"].values, - fitted_plr_model.coef, - decimal=10, - err_msg="Coefficient estimates don't match" - ) - - # Check standard errors - np.testing.assert_array_almost_equal( - coef_table["se"].values, - fitted_plr_model.se, - decimal=10, - err_msg="Standard errors don't match" - ) - - # Check t-statistics - np.testing.assert_array_almost_equal( - coef_table["t"].values, - fitted_plr_model.t_stat, - decimal=10, - err_msg="T-statistics don't match" - ) - - # Check p-values - np.testing.assert_array_almost_equal( - coef_table["p"].values, - fitted_plr_model.pval, - decimal=10, - err_msg="P-values don't match" - ) - - -@pytest.mark.ci -def test_coef_table_confidence_intervals(fitted_plr_model): - """Test that confidence intervals match confint() method.""" - coef_table = fitted_plr_model.__maketables_coef_table__ - ci = fitted_plr_model.confint(level=0.95) - - # Check lower CI bound - np.testing.assert_array_almost_equal( - coef_table["ci95l"].values, - ci.iloc[:, 0].values, - decimal=10, - err_msg="Lower CI bounds don't match" - ) - - # Check upper CI bound - np.testing.assert_array_almost_equal( - coef_table["ci95u"].values, - ci.iloc[:, 1].values, - decimal=10, - err_msg="Upper CI bounds don't match" - ) - - -# ================================================================================== -# Test Statistics Method -# ================================================================================== - - -@pytest.mark.ci -def test_stat_method_returns_n_obs(fitted_plr_model): - """Test that __maketables_stat__ returns number of observations for key 'N'.""" - n_obs = fitted_plr_model.__maketables_stat__("N") - - assert n_obs is not None, "Should return number of observations" - assert n_obs == fitted_plr_model.n_obs, "N should match model's n_obs" - assert isinstance(n_obs, (int, np.integer)), "N should be an integer" - - -@pytest.mark.ci -def test_stat_method_unknown_keys_return_none(fitted_plr_model): - """Test that __maketables_stat__ returns None for unknown keys.""" - # Unknown key should return None - assert fitted_plr_model.__maketables_stat__("unknown_key") is None - - # Empty string should return None - assert fitted_plr_model.__maketables_stat__("") is None - - -@pytest.mark.ci -def test_stat_method_traditional_stats_return_none(fitted_plr_model): - """Test that traditional stats (r2, aic, bic) return None for causal models.""" - # R-squared not applicable for causal inference - assert fitted_plr_model.__maketables_stat__("r2") is None - assert fitted_plr_model.__maketables_stat__("adj_r2") is None - - # Information criteria not applicable - assert fitted_plr_model.__maketables_stat__("aic") is None - assert fitted_plr_model.__maketables_stat__("bic") is None - - # Log-likelihood not applicable - assert fitted_plr_model.__maketables_stat__("ll") is None - - -# ================================================================================== -# Test Dependent Variable -# ================================================================================== - - -@pytest.mark.ci -def test_depvar_returns_string(fitted_plr_model): - """Test that __maketables_depvar__ returns a string.""" - depvar = fitted_plr_model.__maketables_depvar__ - - assert isinstance(depvar, str), "Dependent variable name should be a string" - - -@pytest.mark.ci -def test_depvar_matches_data(fitted_plr_model): - """Test that __maketables_depvar__ matches the actual dependent variable.""" - depvar = fitted_plr_model.__maketables_depvar__ - - assert depvar == "Y", "Dependent variable should be 'Y'" - assert depvar == fitted_plr_model._dml_data.y_col, "Should match data's y_col" - - -# ================================================================================== -# Test Default Statistics Keys -# ================================================================================== - - -@pytest.mark.ci -def test_default_stat_keys_returns_list(fitted_plr_model): - """Test that __maketables_default_stat_keys__ returns a list.""" - default_keys = fitted_plr_model.__maketables_default_stat_keys__ - - assert isinstance(default_keys, list), "Default stat keys should be a list" - - -@pytest.mark.ci -def test_default_stat_keys_contains_n(fitted_plr_model): - """Test that default statistics include 'N'.""" - default_keys = fitted_plr_model.__maketables_default_stat_keys__ - - assert "N" in default_keys, "Default statistics should include 'N'" - - -# ================================================================================== -# Test Multiple Model Types -# ================================================================================== - - -@pytest.mark.ci -def test_maketables_works_with_irm_model(fitted_irm_model): - """Test that maketables mixin works with IRM models.""" - # Should have coefficient table - coef_table = fitted_irm_model.__maketables_coef_table__ - assert isinstance(coef_table, pd.DataFrame) - assert "b" in coef_table.columns - assert "se" in coef_table.columns - assert "p" in coef_table.columns - - # Should return N statistic - assert fitted_irm_model.__maketables_stat__("N") == fitted_irm_model.n_obs - - # Should return depvar - assert fitted_irm_model.__maketables_depvar__ == "Y" - - -@pytest.mark.ci -def test_maketables_works_with_pliv_model(generate_plr_data): - """Test that maketables mixin works with PLIV models.""" - from doubleml.plm.datasets import make_pliv_CHS2015 - - # Generate IV data - np.random.seed(44) - data = make_pliv_CHS2015(n_obs=500, dim_x=5, alpha=0.5, dim_z=1, return_type=pd.DataFrame) - - x_cols = [col for col in data.columns if col.startswith("X")] - dml_data = dml.DoubleMLData(data, "y", "d", x_cols, z_cols="Z1") - - ml_l = LinearRegression() - ml_m = LinearRegression() - ml_r = LinearRegression() - - dml_pliv = dml.DoubleMLPLIV(dml_data, ml_l, ml_m, ml_r, n_folds=2) - dml_pliv.fit() - - # Should have coefficient table - coef_table = dml_pliv.__maketables_coef_table__ - assert isinstance(coef_table, pd.DataFrame) - assert "b" in coef_table.columns - - # Should return N statistic - assert dml_pliv.__maketables_stat__("N") == dml_pliv.n_obs - - # Should return depvar - assert dml_pliv.__maketables_depvar__ == "y" - - -# ================================================================================== -# Test Edge Cases -# ================================================================================== - - -@pytest.mark.ci -def test_unfitted_model_returns_empty_dataframe(unfitted_plr_model): - """Test that unfitted model returns empty DataFrame with correct columns.""" - coef_table = unfitted_plr_model.__maketables_coef_table__ - - assert isinstance(coef_table, pd.DataFrame), "Should return DataFrame" - assert len(coef_table) == 0, "Should be empty for unfitted model" - - # Should still have correct columns - expected_columns = ["b", "se", "t", "p", "ci95l", "ci95u"] - assert list(coef_table.columns) == expected_columns - - -@pytest.mark.ci -def test_unfitted_model_stat_returns_n(unfitted_plr_model): - """Test that unfitted model can still return N statistic.""" - n_obs = unfitted_plr_model.__maketables_stat__("N") - - # Should still have n_obs even if not fitted - assert n_obs is not None - assert n_obs == unfitted_plr_model.n_obs - - -@pytest.mark.ci -def test_unfitted_model_depvar_works(unfitted_plr_model): - """Test that unfitted model can return dependent variable name.""" - depvar = unfitted_plr_model.__maketables_depvar__ - - assert depvar == "Y" - - -@pytest.mark.ci -def test_multi_treatment_model(): - """Test that maketables works with multiple treatment variables.""" - np.random.seed(45) - n = 500 - p = 5 - - # Generate data with 2 treatments - X = np.random.normal(size=(n, p)) - D1 = 0.5 * X[:, 0] + np.random.normal(size=n) - D2 = 0.3 * X[:, 1] + np.random.normal(size=n) - Y = 0.5 * D1 + 0.7 * D2 + X[:, 2] + np.random.normal(size=n) - - df = pd.DataFrame( - np.column_stack((X, Y, D1, D2)), - columns=[f"X{i+1}" for i in range(p)] + ["Y", "D1", "D2"] - ) - - dml_data = dml.DoubleMLData(df, "Y", ["D1", "D2"]) - - ml_l = LinearRegression() - ml_m = LinearRegression() - - dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out") - dml_plr.fit() - - # Coefficient table should have 2 rows (one per treatment) - coef_table = dml_plr.__maketables_coef_table__ - assert len(coef_table) == 2, "Should have 2 rows for 2 treatments" - assert list(coef_table.index) == ["D1", "D2"], "Index should be treatment names" - - -# ================================================================================== -# Test Integration (Optional - requires maketables) -# ================================================================================== - - -@pytest.mark.ci -def test_integration_with_maketables_if_available(fitted_plr_model): - """Test integration with maketables package if it's installed.""" - pytest.importorskip("maketables", reason="maketables not installed") - - from maketables import ETable - - # Should be able to create a table - try: - table = ETable([fitted_plr_model]) - assert table is not None - - # Should be able to render - text_output = table.render("txt") - assert isinstance(text_output, str) - assert len(text_output) > 0 - - except Exception as e: - pytest.fail(f"MakeTables integration failed: {str(e)}") - - -# ================================================================================== -# Test Comparison with Summary -# ================================================================================== - - -@pytest.mark.ci -def test_coef_table_consistent_with_summary(fitted_plr_model): - """Test that coefficient table is consistent with summary property.""" - coef_table = fitted_plr_model.__maketables_coef_table__ - summary = fitted_plr_model.summary - - # Same index - assert list(coef_table.index) == list(summary.index) - - # Coefficients match - np.testing.assert_array_almost_equal( - coef_table["b"].values, - summary["coef"].values, - decimal=10 - ) - - # Standard errors match - np.testing.assert_array_almost_equal( - coef_table["se"].values, - summary["std err"].values, - decimal=10 - ) - - # T-statistics match - np.testing.assert_array_almost_equal( - coef_table["t"].values, - summary["t"].values, - decimal=10 - ) - - # P-values match - np.testing.assert_array_almost_equal( - coef_table["p"].values, - summary["P>|t|"].values, - decimal=10 - ) From 6bc2d8f42c67e12cf938d2861f883405813afde8 Mon Sep 17 00:00:00 2001 From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 8 Jan 2026 13:55:14 +0100 Subject: [PATCH 5/6] ruff formatting --- doubleml/double_ml.py | 2 +- maketables_demo.ipynb | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 66a9eb80..efb0a868 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -12,11 +12,11 @@ from doubleml.double_ml_framework import DoubleMLCore, DoubleMLFramework from doubleml.double_ml_sampling_mixins import SampleSplittingMixin from doubleml.utils._checks import _check_external_predictions -from doubleml.utils.maketables_mixin import MakeTablesMixin from doubleml.utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est from doubleml.utils._sensitivity import _compute_sensitivity_bias from doubleml.utils._tune_optuna import OPTUNA_GLOBAL_SETTING_KEYS, TUNE_ML_MODELS_DOC, resolve_optuna_cv from doubleml.utils.gain_statistics import gain_statistics +from doubleml.utils.maketables_mixin import MakeTablesMixin _implemented_data_backends = ["DoubleMLData", "DoubleMLClusterData", "DoubleMLDIDData", "DoubleMLSSMData", "DoubleMLRDDData"] diff --git a/maketables_demo.ipynb b/maketables_demo.ipynb index 72b68445..3774ac98 100644 --- a/maketables_demo.ipynb +++ b/maketables_demo.ipynb @@ -24,9 +24,10 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", + "from maketables import ETable\n", "from sklearn.linear_model import LinearRegression, LogisticRegression\n", + "\n", "import doubleml as dml\n", - "from maketables import ETable\n", "\n", "# Set random seed for reproducibility\n", "np.random.seed(42)" @@ -206,7 +207,7 @@ ")\n", "\n", "print(f\"Data shape: {df.shape}\")\n", - "print(f\"\\nFirst few rows:\")\n", + "print(\"\\nFirst few rows:\")\n", "df.head()" ] }, @@ -1229,7 +1230,19 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Save as LaTeX\ntable.save('tex', 'table_results.tex')\nprint(\"✅ Saved to table_results.tex\")\n\n# Save as HTML\ntable.save('html', 'table_results.html')\nprint(\"✅ Saved to table_results.html\")\n\n# Save as Word document\ntable.save('docx', 'table_results.docx')\nprint(\"✅ Saved to table_results.docx\")" + "source": [ + "# Save as LaTeX\n", + "table.save('tex', 'table_results.tex')\n", + "print(\"✅ Saved to table_results.tex\")\n", + "\n", + "# Save as HTML\n", + "table.save('html', 'table_results.html')\n", + "print(\"✅ Saved to table_results.html\")\n", + "\n", + "# Save as Word document\n", + "table.save('docx', 'table_results.docx')\n", + "print(\"✅ Saved to table_results.docx\")" + ] }, { "cell_type": "markdown", @@ -1276,4 +1289,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} From 457f2634a1bcc2a836f20aae5f0d659ffc6a200b Mon Sep 17 00:00:00 2001 From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 8 Jan 2026 14:18:35 +0100 Subject: [PATCH 6/6] update docstring examples for maketables --- doubleml/utils/maketables_mixin.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/doubleml/utils/maketables_mixin.py b/doubleml/utils/maketables_mixin.py index 5eb888bf..a26695e7 100644 --- a/doubleml/utils/maketables_mixin.py +++ b/doubleml/utils/maketables_mixin.py @@ -46,7 +46,14 @@ class MakeTablesMixin: -------- >>> from doubleml import DoubleMLPLR >>> # After fitting a DoubleML model - >>> dml_plr.fit() + >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018 + >>> from sklearn.ensemble import RandomForestRegressor + >>> from sklearn.base import clone + >>> np.random.seed(3141) + >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2) + >>> ml_m = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2) + >>> obj_dml_data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=500, dim_x=20) + >>> dml_plr = DoubleMLPLR(obj_dml_data, ml_g, ml_m).fit() >>> # Access maketables attributes >>> coef_table = dml_plr.__maketables_coef_table__ >>> n_obs = dml_plr.__maketables_stat__('N') @@ -134,10 +141,6 @@ def __maketables_stat__(self, key: str): statistics like R-squared, AIC, and BIC are not applicable and will return None. Currently only 'N' (number of observations) is supported. - Examples - -------- - >>> n_obs = dml_model.__maketables_stat__('N') - >>> r2 = dml_model.__maketables_stat__('r2') # Returns None """ stats_map = { "N": self.n_obs if hasattr(self, "n_obs") else None, @@ -159,11 +162,6 @@ def __maketables_depvar__(self) -> str: Retrieves the dependent variable name from the DoubleMLData object's y_col attribute. Falls back to "Y" if the attribute is not available. - Examples - -------- - >>> depvar = dml_model.__maketables_depvar__ - >>> print(depvar) - 'Y' """ if hasattr(self, "_dml_data") and hasattr(self._dml_data, "y_col"): return self._dml_data.y_col @@ -186,10 +184,5 @@ def __maketables_default_stat_keys__(self) -> list: to include in the table by default. Users can override this when calling ETable() by specifying the model_stats parameter. - Examples - -------- - >>> default_stats = dml_model.__maketables_default_stat_keys__ - >>> print(default_stats) - ['N'] """ return ["N"]