Skip to content

Commit

Permalink
added JOSS draft
Browse files Browse the repository at this point in the history
  • Loading branch information
rajitachandak committed Aug 26, 2024
1 parent fc0912e commit a0c0160
Show file tree
Hide file tree
Showing 2 changed files with 195 additions and 0 deletions.
125 changes: 125 additions & 0 deletions CCJM_2024_JOSS-bib.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% lpcde software article bibliography
%% Authors: Cattaneo-Chandak-Jansson-Ma
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@article{CCJM_2024_Bernoulli,
title={Local Polynomial Conditional Density Estimators},
author={Cattaneo, Matias D. and Chandak, Rajita and Jansson, Michael and Ma, Xinwei},
journal={Bernoulli},
volume={30}, number={4}, pages={3193-3223}, year={2024}
}

@article{CCJM_2024_lpcde,
title={lpcde: Estimation and Inference for Local Polynomial Conditional Density Estimators},
author={Cattaneo, Matias D. and Chandak, Rajita and Jansson, Michael and Ma, Xinwei},
journal={arXiv preprint arXiv:2204.10375},
volume={}, number={}, pages={}, year={2024},
url = {https://arxiv.org/abs/2204.10375}
}

@article{Calonico-Cattaneo-Farrell_2018_JASA,
author = {Calonico, Sebastian and Matias D. Cattaneo and
Max H. Farrell},
journal = {Journal of the American Statistical Association},
number = {522},
pages = {767--779},
title = {On the Effect of Bias Estimation on Coverage Accuracy
in Nonparametric Inference},
volume = {113},
year = {2018},
}

@article{Calonico-Cattaneo-Farrell_2022_Bernoulli,
author = {Calonico, Sebastian and Matias D. Cattaneo and
Max H. Farrell},
journal = {Bernoulli},
number = {4},
pages = {2998--3022},
title = {Coverage Error Optimal Confidence Intervals for Local
Polynomial Regression},
volume = {28},
year = {2022},
}

@article{DeGooijer-Zerom_2003_SN,
title={On Conditional Density Estimation},
author={De Gooijer, Jan G and Zerom, Dawit},
journal={Statistica Neerlandica}, volume={57}, number={2}, pages={159--176}, year={2003}
}

@book{Fan-Gijbels_1996_Book,
title ={Local Polynomial Modelling and Its Applications},
author ={Fan, Jianqing and Gijbels, Irene},
publisher={Chapman \& Hall/CRC}, year ={1996}
}

@article{Fan-Yao-Tong_1996_Biometrika,
title={Estimation of Conditional Densities and Sensitivity Measures in Nonlinear Dynamical Systems},
author={Fan, Jianqing and Yao, Qiwei and Tong, Howell},
journal={Biometrika}, volume={83}, number={1}, pages={189--206}, year={1996}
}

@article{Hall-Racine-Li_2004_JASA,
title={Cross-Validation and the Estimation of Conditional Probability Densities},
author={Hall, Peter and Racine, Jeff and Li, Qi},
journal={Journal of the American Statistical Association}, volume={99}, number={468}, pages={1015--1026}, year={2004}
}

@article{Hall-Wolff-Yao_1999_JASA,
title={Methods for Estimating a Conditional Distribution Function},
author={Hall, Peter and Wolff, Rodney CL and Yao, Qiwei},
journal={Journal of the American Statistical Association}, volume={94}, number={445}, pages={154--163}, year={1999}
}

@book{scott2015multivariate,
title={Multivariate Density Estimation: Theory, Practice, and Visualization},
author={Scott, David W},
year={2015},
publisher={John Wiley \& Sons}
}

@book{simonoff2012smoothing,
title={Smoothing Methods in Statistics},
author={Simonoff, Jeffrey S},
year={2012},
publisher={Springer--Verlag}
}

@book{Wand-Jones_1995_Book,
title ={Kernel Smoothing},
author ={Wand, M.P. and M.C. Jones},
publisher={Chapman \& Hall/CRC},
year={1995}
}

@Article{np,
title = {Nonparametric Econometrics: The \pkg{np} Package},
author = {Tristen Hayfield and Jeffrey S. Racine},
journal = {Journal of Statistical Software},
year = {2008},
volume = {27},
number = {5}
}

@Book{ggplot2,
author = {Hadley Wickham},
title = {\pkg{ggplot2}: Elegant Graphics for Data Analysis},
publisher = {Springer-Verlag New York},
year = {2016},
isbn = {978-3-319-24277-4}
}

@Manual{hdrcde,
title = {\pkg{hdrcde}: Highest Density Regions and Conditional Density Estimation},
author = {Rob J Hyndman and Jochen Einbeck and Matthew P Wand},
year = {2021},
note = {\proglang{R} package version 3.4}
}

@article{rothfuss2019conditional,
title={Conditional Density Estimation with Neural Networks: Best Practices and Benchmarks},
author={Rothfuss, Jonas and Ferreira, Fabio and Walther, Simon and Ulrich, Maxim},
journal={arXiv:1903.00954},
year={2019}
}
70 changes: 70 additions & 0 deletions CCJM_2024_JOSS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
---
title: 'lpcde: Estimation and Inference for Local Polynomial Conditional Density Estimators'
tags:
- R
- statistics
- density estimation
- kernel methods
- local polynomials
date: "21 August 2024"
output:
html_document:
df_print: paged
authors:
- name: Matias D. Cattaneo
orcid: "0000-0003-0493-7506"
affiliation: 1
- name: Rajita Chandak
orcid: "0009-0006-4289-2520"
corresponding: true
affiliation: 2
- name: Michael Jansson
orcid: "0000-0003-4678-7518"
affiliation: 3
- name: Xinwei Ma
orcid: "0000-0001-8827-9146"
affiliation: 4
bibliography: CCJM_2024_JOSS-bib.bib
link-citations: true
affiliations:
- name: Department of Operations Research and Financial Engineering, Princeton University,
USA
index: 1
- name: Institute of Mathematics, EPFL, Switzerland
index: 2
- name: Department of Economics, University of California, Berkeley, USA
index: 3
- name: Department of Economics, University of California, San Diego, USA
index: 4
---

# Summary

Conditional cumulative distribution functions (CDFs), conditional probability density functions (PDFs), and derivatives thereof, are important parameters of interest in statistics, econometrics, and other data science disciplines. The package `lpcde` implements new estimation and inference methods for conditional CDFs, conditional PDFs, and derivatives thereof, employing the kernel-based local polynomial smoothing approach introduced in @CCJM_2024_Bernoulli.

The package `lpcde` offers data-driven (pointwise and uniform) estimation and inference methods for conditional CDFs, conditional PDFs, and derivatives thereof, which are automatically valid at both interior and boundary points of the support of the outcome and conditioning variables. For point estimation, the package offers mean squared error optimal bandwidth selection and associated optimal mean square and uniform point estimators. For inference, the package offers valid confidence intervals and confidence bands based on robust bias-correction techniques [@Calonico-Cattaneo-Farrell_2018_JASA; @Calonico-Cattaneo-Farrell_2022_Bernoulli]. Finally, these statistical procedures can be easily used for visualization and graphical presentation of smooth estimates of conditional CDFs, conditional PDFs, and derivative thereof, with custom `ggplot` [@ggplot2] commands built for the package.

This package is currently the only open source implementation of an estimator offering boundary adaptive, data-driven conditional density estimation with robust bias-corrected pointwise confidence interval and uniform confidence band constructions, providing users with statistical tools to better understand the reliability of their empirical analysis. A detailed tutorial, replication files, and other information on how to use the package can be found in the [GitHub repository](https://github.com/nppackages/lpcde) and through the [CRAN repository](https://cran.r-project.org/web/packages/lpcde/index.html). See also the `lpcde` package website (https://nppackages.github.io/lpcde/) and the companion arXiv article [@CCJM_2024_lpcde] for additional methodological information and numerical results.

# Statement of need

@Wand-Jones_1995_Book, @Fan-Gijbels_1996_Book, @simonoff2012smoothing, and @scott2015multivariate give textbook introductions to kernel-based density and local polynomial estimation and inference methods. The core idea underlying the estimator implemented in `lpcde` is to use kernel-based local polynomial smoothing methods to construct an automatic boundary adaptive estimator for conditional CDFs, conditional PDFs, and derivatives thereof. The estimator implemented in this package consists of two steps. The first step estimates the conditional distribution function using standard local polynomial regression methods, and the second step applies local polynomial smoothing to the (non-smooth) local polynomial conditional CDF estimate from the first step to obtain a smooth estimate of the conditional CDF, conditioal PDF, and derivatives thereof.

A distinct advantage of this estimation method over existing ones is its boundary adaptivity for a possibly unknown compact support of the data. Furthermore, the estimator has a simple closed form representation, which leads to easy and fast implementation. Unlike other boundary adaptive procedures, the estimation procedures implemented in the package `lpcde` do not require pre-processing of data, and thus avoid the challenges of hyper-parameter tuning: only one bandwidth parameter needs to be selected for implementation. See @CCJM_2024_Bernoulli and @CCJM_2024_lpcde for more details.

# Comparing and contrasting existing toolsets

The package `lpcde` contributes to a small set of open source statistical software packages implementing estimation and inference methods for conditional CDF, conditional PDF, and derivatives thereof. More specifically, we identified two `R` packages, `hdrcde` [@hdrcde] and `np` [@np], and one `Python` package, `cde` [@rothfuss2019conditional], which provide related methodology. There are no open source `Stata` packages that implement comparable estimation and inference methods. The table below summarizes some of the main differences between those other packages and `lpcde`. Notably, `lpcde` is the only package available that provides both pointwise and uniform uncertainty quantification, in addition to producing boundary adaptive mean square and uniformly optimal point estimates via data-driven, optimal tuning parameter selection. Furthermore, the `lpcde` package produces proper conditional density estimates that are non-negative and integrate to one. These features are unique contributions of the package to the `R` toolkit and, more broadly, to the open source statistical community.

| Package | Programming language | CDF/Derivative estimation | Regularized density | Valid at boundary | Standard error | Valid inference | Confidence bands | Bandwidth selection |
|--------|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|
| `hdrcde` | R | x | x | x | x | x | x ||
| `np` | R | x | x | x || x | x ||
| `cde` | Python | x | x | x | x | x | x ||
| `lpcde` | R ||||||||

# Acknowledgements

Cattaneo gratefully acknowledges financial support from the National Science Foundation through grants SES-1947805 and DMS-2210561, and from the National Institute of Health (R01 GM072611-16). Jansson gratefully acknowledges financial support from the National Science Foundation through grant SES-1947662.

# References

0 comments on commit a0c0160

Please sign in to comment.