Skip to content

Commit

Permalink
add time_invariant_to_panel function
Browse files Browse the repository at this point in the history
  • Loading branch information
bcallaway11 committed Sep 12, 2024
1 parent ab5915f commit 104ec43
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 29 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export(source_all)
export(subsample)
export(t2orig)
export(t2orig_inner)
export(time_invariant_to_panel)
export(toformula)
export(weighted.checkfun)
import(data.table)
Expand Down
27 changes: 15 additions & 12 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,28 @@

* add function `get_principal_components` to get unit-specific principal components of time-varying variables

* add function `time_invariant_to_panel` for repeating time-invariant variables, mainly with the
idea of adding them into a panel data set

# BMisc 1.4.6

* Adds functions `get_group`, `get_YiGmin1`, and `get_Yi1` as utility functions for manipulating data. This is especially useful for settings with staggered treatment adoption.
* Add functions `get_lagYi` and `get_first_difference` as more utility functions for working with panel data.
* Add function `get_Yit` which recovers outcomes

* Add functions `get_lagYi` and `get_first_difference` as more utility functions for working with panel data.

* Add function `get_Yit` which recovers outcomes
in period t for all units in a panel.

* Restore default of `makeBalancedPanel` to return a data.frame rather than a data.table. New function argument `return_data.table` allows for returning a data.table if this is the desired functionality from the user.

* Fixes documentation NOTE in `id2rownum`

# BMisc 1.4.5

* only execute examples for `subsample` and `blockBootSample` if `plm` package is available (thanks Achim Zeileis)

# BMisc 1.4.4

* faster version of `panel2cs2` (thanks Kyle Butts)

# BMisc 1.4.3
Expand All @@ -38,7 +41,7 @@
# BMisc 1.4.2

* changed package maintainer contact information

* added source_all function

# BMisc 1.4.1
Expand All @@ -48,7 +51,7 @@
# BMisc 1.4.0

* removed dependency on plm and formula.tools

* add function blockBootSample for block bootstrapping with panel data

* add option in makeDist to force the values of the distribution function be between 0 and 1
Expand All @@ -58,13 +61,13 @@
* Update rhs.vars to fix bug related to formulas like y~x+I(x^2)

* Update toformula to allow for no right hand side variables

# BMisc 1.3.0

* Added function \code{invertEcdf} to take distribution functions (ecdf objects) and turn them into step functions for the quantiles.

* Improved code for working with formulas

# BMisc 1.2.0

* Added function \code{subsample} for obtaining a subsample of a panel data set
Expand Down
63 changes: 50 additions & 13 deletions R/BMisc.R
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ blockBootSample <- function(data, idname) {
makeDist <- function(x, Fx, sorted = FALSE, rearrange = FALSE, force01 = FALSE, method = "constant") {
if (!sorted) {
tmat <- cbind(x, Fx)
tmat <- tmat[order(x), , drop=FALSE]
tmat <- tmat[order(x), , drop = FALSE]
x <- tmat[, 1]
Fx <- tmat[, 2]
}
Expand Down Expand Up @@ -1090,6 +1090,42 @@ get_first_difference <- function(df, idname, yname, tname) {
df[, yname] - df$.lag
}

#' @title time_invariant_to_panel
#'
#' @description This function takes a time-invariant variable and repeats it
#' for each period in a panel data set.
#'
#' @param x a vector of length equal to the number of unique ids in df.
#' @inheritParams get_lagYi
#' @param balanced_panel a logical indicating whether the panel is balanced.
#'
#' @return a vector of length equal to the number of rows in df.
#' @export
time_invariant_to_panel <- function(x, df, idname, balanced_panel = FALSE) {
# Ensure that x has the same length as the number of unique ids
unique_ids <- unique(df[[id]])
if (length(x) != length(unique_ids)) {
stop("The length of x must be equal to the number of unique ids in df.")
}

# If the panel is balanced, optimize the repetition process
if (balanced_panel) {
# Calculate the number of rows per id (assuming the panel is balanced)
n_per_id <- nrow(df) / length(unique_ids)

# Repeat each value in x by the number of periods per id
out <- rep(x, each = n_per_id)
} else {
# For unbalanced panels, create a lookup table
id_to_x <- setNames(x, unique_ids)

# Repeat each value of x according to the occurrences of each id
out <- id_to_x[df[[id]]]
}

return(out)
}

#' Matrix-Vector Multiplication
#'
#' This function multiplies a matrix by a vector and returns a numeric vector.
Expand Down Expand Up @@ -1211,21 +1247,22 @@ drop_collinear <- function(matrix) {
#' @param data a data.frame containing the panel data
#' @param idname the name of the column containing the unit id
#' @param tname the name of the column containing the time period
#' @param n_components the number of principal components to retain, the default is NULL which
#' @param n_components the number of principal components to retain, the default is NULL which
#' will result in all principal components being retained
#' @param ret_wide whether to return the data in wide format (where the number of rows
#' is equal to n = length(unique(data[[idname]])) or long format (where the number
#' of rows is equal to nT = nrow(data)). The default is FALSE, so that long data
#' @param ret_wide whether to return the data in wide format (where the number of rows
#' is equal to n = length(unique(data[[idname]])) or long format (where the number
#' of rows is equal to nT = nrow(data)). The default is FALSE, so that long data
#' is returned by default.
#' @param ret_id whether to return the id column in the output data.frame. The default is FALSE.
#' @return a data.frame containing the original data with the principal components appended
#' @export
get_principal_components <- function(xformula, data, idname, tname,
n_components=NULL, ret_wide=FALSE, ret_id=FALSE) {
get_principal_components <- function(
xformula, data, idname, tname,
n_components = NULL, ret_wide = FALSE, ret_id = FALSE) {
X <- model.matrix(xformula, data)
# drop intercept if it is included
if (all(X[,1] == 1)) {
X <- X[, -1, drop=FALSE]
if (all(X[, 1] == 1)) {
X <- X[, -1, drop = FALSE]
}
nperiods <- length(unique(data[[tname]]))
# handle number of components to return
Expand All @@ -1236,8 +1273,8 @@ get_principal_components <- function(xformula, data, idname, tname,
for (i in 1:ncol(X)) {
this_x_name <- colnames(X)[i]
x <- X[, i]
df <- data.frame(.id=data[[idname]], .time=data[[tname]], x)
wide_data <- df %>% pivot_wider(id_cols = .id, names_from = .time, names_prefix="_x_", values_from = x)
df <- data.frame(.id = data[[idname]], .time = data[[tname]], x)
wide_data <- df %>% pivot_wider(id_cols = .id, names_from = .time, names_prefix = "_x_", values_from = x)
.id <- wide_data$.id
pca_inner <- wide_data %>%
select(starts_with("_x_")) %>%
Expand All @@ -1254,6 +1291,6 @@ get_principal_components <- function(xformula, data, idname, tname,
if (ret_wide) {
return(pc_data)
} else {
return(pc_data[rep(1:nrow(pc_data), each=nperiods), ])
return(pc_data[rep(1:nrow(pc_data), each = nperiods), ])
}
}
}
8 changes: 4 additions & 4 deletions man/get_principal_components.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions man/time_invariant_to_panel.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 104ec43

Please sign in to comment.