-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fc0912e
commit a0c0160
Showing
2 changed files
with
195 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
%% lpcde software article bibliography | ||
%% Authors: Cattaneo-Chandak-Jansson-Ma | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
@article{CCJM_2024_Bernoulli, | ||
title={Local Polynomial Conditional Density Estimators}, | ||
author={Cattaneo, Matias D. and Chandak, Rajita and Jansson, Michael and Ma, Xinwei}, | ||
journal={Bernoulli}, | ||
volume={30}, number={4}, pages={3193-3223}, year={2024} | ||
} | ||
|
||
@article{CCJM_2024_lpcde, | ||
title={lpcde: Estimation and Inference for Local Polynomial Conditional Density Estimators}, | ||
author={Cattaneo, Matias D. and Chandak, Rajita and Jansson, Michael and Ma, Xinwei}, | ||
journal={arXiv preprint arXiv:2204.10375}, | ||
volume={}, number={}, pages={}, year={2024}, | ||
url = {https://arxiv.org/abs/2204.10375} | ||
} | ||
|
||
@article{Calonico-Cattaneo-Farrell_2018_JASA, | ||
author = {Calonico, Sebastian and Matias D. Cattaneo and | ||
Max H. Farrell}, | ||
journal = {Journal of the American Statistical Association}, | ||
number = {522}, | ||
pages = {767--779}, | ||
title = {On the Effect of Bias Estimation on Coverage Accuracy | ||
in Nonparametric Inference}, | ||
volume = {113}, | ||
year = {2018}, | ||
} | ||
|
||
@article{Calonico-Cattaneo-Farrell_2022_Bernoulli, | ||
author = {Calonico, Sebastian and Matias D. Cattaneo and | ||
Max H. Farrell}, | ||
journal = {Bernoulli}, | ||
number = {4}, | ||
pages = {2998--3022}, | ||
title = {Coverage Error Optimal Confidence Intervals for Local | ||
Polynomial Regression}, | ||
volume = {28}, | ||
year = {2022}, | ||
} | ||
|
||
@article{DeGooijer-Zerom_2003_SN, | ||
title={On Conditional Density Estimation}, | ||
author={De Gooijer, Jan G and Zerom, Dawit}, | ||
journal={Statistica Neerlandica}, volume={57}, number={2}, pages={159--176}, year={2003} | ||
} | ||
|
||
@book{Fan-Gijbels_1996_Book, | ||
title ={Local Polynomial Modelling and Its Applications}, | ||
author ={Fan, Jianqing and Gijbels, Irene}, | ||
publisher={Chapman \& Hall/CRC}, year ={1996} | ||
} | ||
|
||
@article{Fan-Yao-Tong_1996_Biometrika, | ||
title={Estimation of Conditional Densities and Sensitivity Measures in Nonlinear Dynamical Systems}, | ||
author={Fan, Jianqing and Yao, Qiwei and Tong, Howell}, | ||
journal={Biometrika}, volume={83}, number={1}, pages={189--206}, year={1996} | ||
} | ||
|
||
@article{Hall-Racine-Li_2004_JASA, | ||
title={Cross-Validation and the Estimation of Conditional Probability Densities}, | ||
author={Hall, Peter and Racine, Jeff and Li, Qi}, | ||
journal={Journal of the American Statistical Association}, volume={99}, number={468}, pages={1015--1026}, year={2004} | ||
} | ||
|
||
@article{Hall-Wolff-Yao_1999_JASA, | ||
title={Methods for Estimating a Conditional Distribution Function}, | ||
author={Hall, Peter and Wolff, Rodney CL and Yao, Qiwei}, | ||
journal={Journal of the American Statistical Association}, volume={94}, number={445}, pages={154--163}, year={1999} | ||
} | ||
|
||
@book{scott2015multivariate, | ||
title={Multivariate Density Estimation: Theory, Practice, and Visualization}, | ||
author={Scott, David W}, | ||
year={2015}, | ||
publisher={John Wiley \& Sons} | ||
} | ||
|
||
@book{simonoff2012smoothing, | ||
title={Smoothing Methods in Statistics}, | ||
author={Simonoff, Jeffrey S}, | ||
year={2012}, | ||
publisher={Springer--Verlag} | ||
} | ||
|
||
@book{Wand-Jones_1995_Book, | ||
title ={Kernel Smoothing}, | ||
author ={Wand, M.P. and M.C. Jones}, | ||
publisher={Chapman \& Hall/CRC}, | ||
year={1995} | ||
} | ||
|
||
@Article{np, | ||
title = {Nonparametric Econometrics: The \pkg{np} Package}, | ||
author = {Tristen Hayfield and Jeffrey S. Racine}, | ||
journal = {Journal of Statistical Software}, | ||
year = {2008}, | ||
volume = {27}, | ||
number = {5} | ||
} | ||
|
||
@Book{ggplot2, | ||
author = {Hadley Wickham}, | ||
title = {\pkg{ggplot2}: Elegant Graphics for Data Analysis}, | ||
publisher = {Springer-Verlag New York}, | ||
year = {2016}, | ||
isbn = {978-3-319-24277-4} | ||
} | ||
|
||
@Manual{hdrcde, | ||
title = {\pkg{hdrcde}: Highest Density Regions and Conditional Density Estimation}, | ||
author = {Rob J Hyndman and Jochen Einbeck and Matthew P Wand}, | ||
year = {2021}, | ||
note = {\proglang{R} package version 3.4} | ||
} | ||
|
||
@article{rothfuss2019conditional, | ||
title={Conditional Density Estimation with Neural Networks: Best Practices and Benchmarks}, | ||
author={Rothfuss, Jonas and Ferreira, Fabio and Walther, Simon and Ulrich, Maxim}, | ||
journal={arXiv:1903.00954}, | ||
year={2019} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
--- | ||
title: 'lpcde: Estimation and Inference for Local Polynomial Conditional Density Estimators' | ||
tags: | ||
- R | ||
- statistics | ||
- density estimation | ||
- kernel methods | ||
- local polynomials | ||
date: "21 August 2024" | ||
output: | ||
html_document: | ||
df_print: paged | ||
authors: | ||
- name: Matias D. Cattaneo | ||
orcid: "0000-0003-0493-7506" | ||
affiliation: 1 | ||
- name: Rajita Chandak | ||
orcid: "0009-0006-4289-2520" | ||
corresponding: true | ||
affiliation: 2 | ||
- name: Michael Jansson | ||
orcid: "0000-0003-4678-7518" | ||
affiliation: 3 | ||
- name: Xinwei Ma | ||
orcid: "0000-0001-8827-9146" | ||
affiliation: 4 | ||
bibliography: CCJM_2024_JOSS-bib.bib | ||
link-citations: true | ||
affiliations: | ||
- name: Department of Operations Research and Financial Engineering, Princeton University, | ||
USA | ||
index: 1 | ||
- name: Institute of Mathematics, EPFL, Switzerland | ||
index: 2 | ||
- name: Department of Economics, University of California, Berkeley, USA | ||
index: 3 | ||
- name: Department of Economics, University of California, San Diego, USA | ||
index: 4 | ||
--- | ||
|
||
# Summary | ||
|
||
Conditional cumulative distribution functions (CDFs), conditional probability density functions (PDFs), and derivatives thereof, are important parameters of interest in statistics, econometrics, and other data science disciplines. The package `lpcde` implements new estimation and inference methods for conditional CDFs, conditional PDFs, and derivatives thereof, employing the kernel-based local polynomial smoothing approach introduced in @CCJM_2024_Bernoulli. | ||
|
||
The package `lpcde` offers data-driven (pointwise and uniform) estimation and inference methods for conditional CDFs, conditional PDFs, and derivatives thereof, which are automatically valid at both interior and boundary points of the support of the outcome and conditioning variables. For point estimation, the package offers mean squared error optimal bandwidth selection and associated optimal mean square and uniform point estimators. For inference, the package offers valid confidence intervals and confidence bands based on robust bias-correction techniques [@Calonico-Cattaneo-Farrell_2018_JASA; @Calonico-Cattaneo-Farrell_2022_Bernoulli]. Finally, these statistical procedures can be easily used for visualization and graphical presentation of smooth estimates of conditional CDFs, conditional PDFs, and derivative thereof, with custom `ggplot` [@ggplot2] commands built for the package. | ||
|
||
This package is currently the only open source implementation of an estimator offering boundary adaptive, data-driven conditional density estimation with robust bias-corrected pointwise confidence interval and uniform confidence band constructions, providing users with statistical tools to better understand the reliability of their empirical analysis. A detailed tutorial, replication files, and other information on how to use the package can be found in the [GitHub repository](https://github.com/nppackages/lpcde) and through the [CRAN repository](https://cran.r-project.org/web/packages/lpcde/index.html). See also the `lpcde` package website (https://nppackages.github.io/lpcde/) and the companion arXiv article [@CCJM_2024_lpcde] for additional methodological information and numerical results. | ||
|
||
# Statement of need | ||
|
||
@Wand-Jones_1995_Book, @Fan-Gijbels_1996_Book, @simonoff2012smoothing, and @scott2015multivariate give textbook introductions to kernel-based density and local polynomial estimation and inference methods. The core idea underlying the estimator implemented in `lpcde` is to use kernel-based local polynomial smoothing methods to construct an automatic boundary adaptive estimator for conditional CDFs, conditional PDFs, and derivatives thereof. The estimator implemented in this package consists of two steps. The first step estimates the conditional distribution function using standard local polynomial regression methods, and the second step applies local polynomial smoothing to the (non-smooth) local polynomial conditional CDF estimate from the first step to obtain a smooth estimate of the conditional CDF, conditioal PDF, and derivatives thereof. | ||
|
||
A distinct advantage of this estimation method over existing ones is its boundary adaptivity for a possibly unknown compact support of the data. Furthermore, the estimator has a simple closed form representation, which leads to easy and fast implementation. Unlike other boundary adaptive procedures, the estimation procedures implemented in the package `lpcde` do not require pre-processing of data, and thus avoid the challenges of hyper-parameter tuning: only one bandwidth parameter needs to be selected for implementation. See @CCJM_2024_Bernoulli and @CCJM_2024_lpcde for more details. | ||
|
||
# Comparing and contrasting existing toolsets | ||
|
||
The package `lpcde` contributes to a small set of open source statistical software packages implementing estimation and inference methods for conditional CDF, conditional PDF, and derivatives thereof. More specifically, we identified two `R` packages, `hdrcde` [@hdrcde] and `np` [@np], and one `Python` package, `cde` [@rothfuss2019conditional], which provide related methodology. There are no open source `Stata` packages that implement comparable estimation and inference methods. The table below summarizes some of the main differences between those other packages and `lpcde`. Notably, `lpcde` is the only package available that provides both pointwise and uniform uncertainty quantification, in addition to producing boundary adaptive mean square and uniformly optimal point estimates via data-driven, optimal tuning parameter selection. Furthermore, the `lpcde` package produces proper conditional density estimates that are non-negative and integrate to one. These features are unique contributions of the package to the `R` toolkit and, more broadly, to the open source statistical community. | ||
|
||
| Package | Programming language | CDF/Derivative estimation | Regularized density | Valid at boundary | Standard error | Valid inference | Confidence bands | Bandwidth selection | | ||
|--------|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:| | ||
| `hdrcde` | R | x | x | x | x | x | x | ✓ | | ||
| `np` | R | x | x | x | ✓ | x | x | ✓ | | ||
| `cde` | Python | x | x | x | x | x | x | ✓ | | ||
| `lpcde` | R | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | ||
|
||
# Acknowledgements | ||
|
||
Cattaneo gratefully acknowledges financial support from the National Science Foundation through grants SES-1947805 and DMS-2210561, and from the National Institute of Health (R01 GM072611-16). Jansson gratefully acknowledges financial support from the National Science Foundation through grant SES-1947662. | ||
|
||
# References |