Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,8 @@ MANIFEST

# Setuptools SCM
doubleml/_version.py

# Virtual environment
.venv
venv/
env/
3 changes: 2 additions & 1 deletion doubleml/double_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
from doubleml.utils._sensitivity import _compute_sensitivity_bias
from doubleml.utils._tune_optuna import OPTUNA_GLOBAL_SETTING_KEYS, TUNE_ML_MODELS_DOC, resolve_optuna_cv
from doubleml.utils.gain_statistics import gain_statistics
from doubleml.utils.maketables_mixin import MakeTablesMixin

_implemented_data_backends = ["DoubleMLData", "DoubleMLClusterData", "DoubleMLDIDData", "DoubleMLSSMData", "DoubleMLRDDData"]


class DoubleML(SampleSplittingMixin, ABC):
class DoubleML(MakeTablesMixin, SampleSplittingMixin, ABC):
"""Double Machine Learning."""

def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting, double_sample_splitting=False):
Expand Down
188 changes: 188 additions & 0 deletions doubleml/utils/maketables_mixin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""
MakeTables Mixin for DoubleML Models.

This module provides a mixin class that adds MakeTables plug-in support to DoubleML models.
The mixin implements the three required attributes for MakeTables compatibility:
- __maketables_coef_table__: Returns coefficient table as DataFrame
- __maketables_stat__: Returns model statistics by key
- __maketables_depvar__: Returns dependent variable name

This enables zero-coupling integration with MakeTables - DoubleML never imports maketables,
but models automatically work with it when users have maketables installed.
"""

import numpy as np
import pandas as pd


class MakeTablesMixin:
"""
Mixin class for MakeTables plug-in support.

This mixin adds three attributes that enable DoubleML models to automatically work
with the MakeTables package for creating publication-ready regression tables.

The plug-in format uses duck typing - MakeTables automatically detects these
attributes when present, without requiring any imports or dependencies.

Attributes
----------
__maketables_coef_table__ : pd.DataFrame (property)
Coefficient table with columns 'b' (estimates), 'se' (standard errors),
'p' (p-values), 't' (t-statistics), 'ci95l', 'ci95u' (95% CI bounds).

__maketables_depvar__ : str (property)
Name of the dependent variable.

__maketables_default_stat_keys__ : list (property)
Default statistics to display in tables.

Methods
-------
__maketables_stat__(key)
Return model statistic by key (e.g., 'N' for number of observations).

Examples
--------
>>> from doubleml import DoubleMLPLR
>>> # After fitting a DoubleML model
>>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.base import clone
>>> np.random.seed(3141)
>>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
>>> ml_m = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
>>> obj_dml_data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=500, dim_x=20)
>>> dml_plr = DoubleMLPLR(obj_dml_data, ml_g, ml_m).fit()
>>> # Access maketables attributes
>>> coef_table = dml_plr.__maketables_coef_table__
>>> n_obs = dml_plr.__maketables_stat__('N')
>>> depvar = dml_plr.__maketables_depvar__
"""

@property
def __maketables_coef_table__(self) -> pd.DataFrame:
"""
Return coefficient table with all required and optional columns for MakeTables.

Returns a pandas DataFrame with coefficient estimates, standard errors, p-values,
t-statistics, and 95% confidence intervals. The DataFrame index matches the
treatment variable names from the fitted model.

Returns
-------
pd.DataFrame
Coefficient table with columns:
- 'b': coefficient estimates (required)
- 'se': standard errors (required)
- 'p': p-values (required)
- 't': t-statistics (optional)
- 'ci95l': lower 95% confidence interval bound (optional)
- 'ci95u': upper 95% confidence interval bound (optional)

Notes
-----
- Returns empty DataFrame with correct columns if model is unfitted or all coefficients are NaN
- Index is set to match the summary table index (treatment variable names)
- Handles edge cases gracefully without raising errors
"""
# Handle unfitted model
if not hasattr(self, "coef") or self.coef is None:
return pd.DataFrame(columns=["b", "se", "t", "p", "ci95l", "ci95u"])

# Handle NaN coefficients (model fitted but no valid estimates)
if np.isnan(self.coef).all():
return pd.DataFrame(columns=["b", "se", "t", "p", "ci95l", "ci95u"])

# Get 95% confidence intervals
ci = self.confint(level=0.95)

# Build coefficient table with required and optional columns
coef_table = pd.DataFrame(
{
"b": self.coef, # Required: coefficient estimates
"se": self.se, # Required: standard errors
"p": self.pval, # Required: p-values
"t": self.t_stat, # Optional: t-statistics
"ci95l": ci.iloc[:, 0], # Optional: lower 95% CI bound
"ci95u": ci.iloc[:, 1], # Optional: upper 95% CI bound
}
)

# Set index to match summary table (handles treatment variable names)
if hasattr(self, "summary") and self.summary is not None and len(self.summary) > 0:
coef_table.index = self.summary.index

return coef_table

def __maketables_stat__(self, key: str):
"""
Return model statistic by key.

Parameters
----------
key : str
The statistic key to retrieve. Common keys include:
- 'N': number of observations
- 'r2': R-squared (not applicable for DoubleML)
- 'adj_r2': adjusted R-squared (not applicable for DoubleML)
- 'aic': Akaike Information Criterion (not applicable for DoubleML)
- 'bic': Bayesian Information Criterion (not applicable for DoubleML)
- 'll': log-likelihood (not applicable for DoubleML)

Returns
-------
float, int, or None
The requested statistic value, or None if not available or not applicable.

Notes
-----
DoubleML focuses on causal inference, not prediction, so traditional model fit
statistics like R-squared, AIC, and BIC are not applicable and will return None.
Currently only 'N' (number of observations) is supported.

"""
stats_map = {
"N": self.n_obs if hasattr(self, "n_obs") else None,
}
return stats_map.get(key, None)

@property
def __maketables_depvar__(self) -> str:
"""
Return the name of the dependent variable.

Returns
-------
str
Name of the dependent (outcome) variable. Defaults to "Y" if not available.

Notes
-----
Retrieves the dependent variable name from the DoubleMLData object's y_col attribute.
Falls back to "Y" if the attribute is not available.

"""
if hasattr(self, "_dml_data") and hasattr(self._dml_data, "y_col"):
return self._dml_data.y_col
return "Y" # Fallback

@property
def __maketables_default_stat_keys__(self) -> list:
"""
Return default statistics to display in MakeTables output.

Returns
-------
list
List of statistic keys to display by default. For DoubleML models,
this is ['N'] (number of observations).

Notes
-----
This is an optional attribute that helps MakeTables know which statistics
to include in the table by default. Users can override this when calling
ETable() by specifying the model_stats parameter.

"""
return ["N"]
Loading
Loading