diff --git a/.Rbuildignore b/.Rbuildignore index 5163d0b..d32b58f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1 +1,4 @@ ^LICENSE\.md$ +.git/ +.github/ +^\.github$ diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..c49182c --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +.git/ +.github/ +Dockerfile diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/build-Docker-image-nightly.yml b/.github/workflows/build-Docker-image-nightly.yml new file mode 100644 index 0000000..7ffa64f --- /dev/null +++ b/.github/workflows/build-Docker-image-nightly.yml @@ -0,0 +1,12 @@ +on: + schedule: + - cron: '0 0 * * 1,2,3,4,5' + +jobs: + build_docker_image: + name: "Call build and push action" + uses: ./.github/workflows/build-and-push-Docker-image.yml + secrets: inherit + with: + image-name: workflow.factset + image-tag: nightly diff --git a/.github/workflows/build-Docker-image-on-push-to-main.yml b/.github/workflows/build-Docker-image-on-push-to-main.yml new file mode 100644 index 0000000..b75fca6 --- /dev/null +++ b/.github/workflows/build-Docker-image-on-push-to-main.yml @@ -0,0 +1,12 @@ +on: + push: + branches: [main] + +jobs: + build_docker_image: + name: "Call build and push action" + uses: ./.github/workflows/build-and-push-Docker-image.yml + secrets: inherit + with: + image-name: workflow.factset + image-tag: main diff --git a/.github/workflows/build-Docker-image-on-push-to-pr.yml b/.github/workflows/build-Docker-image-on-push-to-pr.yml new file mode 100644 index 0000000..16934bb --- /dev/null +++ b/.github/workflows/build-Docker-image-on-push-to-pr.yml @@ -0,0 +1,37 @@ +on: + pull_request: + +jobs: + build_docker_image: + name: "Call build and push action" + uses: ./.github/workflows/build-and-push-Docker-image.yml + secrets: inherit + with: + image-name: workflow.factset + image-tag: pr${{ github.event.pull_request.number }} + + add_comment: + needs: build_docker_image + runs-on: ubuntu-latest + steps: + - name: Find Comment + # https://github.com/peter-evans/find-comment + uses: peter-evans/find-comment@v2 + id: fc + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: Docker image from this PR + + - name: Create or update comment + # https://github.com/peter-evans/create-or-update-comment + uses: peter-evans/create-or-update-comment@v3 + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body: | + Docker image from this PR (${{ github.event.pull_request.head.sha }}) created + ``` + docker pull ${{ needs.build_docker_image.outputs.full-image-name }} + ``` + edit-mode: replace diff --git a/.github/workflows/build-and-push-Docker-image.yml b/.github/workflows/build-and-push-Docker-image.yml new file mode 100644 index 0000000..b6d8e1e --- /dev/null +++ b/.github/workflows/build-and-push-Docker-image.yml @@ -0,0 +1,67 @@ +--- +name: Build and push docker image + +on: + workflow_call: + inputs: + image-name: + required: true + type: string + image-tag: + required: true + type: string + outputs: + full-image-name: + description: "Full pushed image name including host/registry, name, and tag" + value: ${{ jobs.docker.outputs.full-image-name }} + +jobs: + docker: + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + timeout-minutes: 25 + outputs: + full-image-name: ${{ steps.image-name.outputs.full-image-name }} + + steps: + + - name: Define image name + id: image-name + run: | + full_image_name="ghcr.io/${{ github.repository_owner }}/${{ inputs.image-name }}:${{ inputs.image-tag }}" + full_image_name=$(echo $full_image_name | tr '[A-Z]' '[a-z]') + echo "full-image-name=$full_image_name" >> "$GITHUB_OUTPUT" + echo "$full_image_name" > full-image-name + + - uses: actions/upload-artifact@v3 + with: + name: full-image-name + path: . + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v5 + with: + push: true + tags: ${{ steps.image-name.outputs.full-image-name }} + cache-from: type=gha + cache-to: type=gha,mode=min + no-cache-filters: install-pacta + + check-system-dependencies: + name: "Check System Dependencies" + needs: docker + uses: ./.github/workflows/check-R-sysdeps.yml + with: + image: ${{ needs.docker.outputs.full-image-name }} \ No newline at end of file diff --git a/.github/workflows/check-R-sysdeps.yml b/.github/workflows/check-R-sysdeps.yml new file mode 100644 index 0000000..3a1c08b --- /dev/null +++ b/.github/workflows/check-R-sysdeps.yml @@ -0,0 +1,32 @@ +--- +name: Check R system dependencies + +on: + workflow_call: + inputs: + image: + required: true + type: string + +jobs: + + check-system-dependencies: + runs-on: ubuntu-latest + steps: + - name: 'Pull image' + run: | + echo ${{ inputs.image }} + docker pull ${{ inputs.image }} + - name: 'Run pak::sysreqs_check_installed()' + run: | + + docker run \ + --rm \ + --entrypoint "/bin/sh" \ + ${{ inputs.image }} \ + -c "Rscript -e ' + x <- pak::sysreqs_check_installed() + print(x) + is_installed <- as.data.frame(x)[[\"installed\"]] + stopifnot(all(is_installed)) + '" diff --git a/.github/workflows/lint-package.yaml b/.github/workflows/lint-package.yaml new file mode 100644 index 0000000..f4c4ef2 --- /dev/null +++ b/.github/workflows/lint-package.yaml @@ -0,0 +1,32 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: lint + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::lintr, local::. + needs: lint + + - name: Lint + run: lintr::lint_package() + shell: Rscript {0} + env: + LINTR_ERROR_ON_LINT: true diff --git a/.github/workflows/run-hadolint.yml b/.github/workflows/run-hadolint.yml new file mode 100644 index 0000000..0f07812 --- /dev/null +++ b/.github/workflows/run-hadolint.yml @@ -0,0 +1,11 @@ +--- +on: [push, pull_request] + +jobs: + hadolint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: hadolint/hadolint-action@v3.1.0 + with: + dockerfile: Dockerfile diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e88cb47 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +azure-deploy.parameters.json diff --git a/DESCRIPTION b/DESCRIPTION index 03ec3c5..4a5c654 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,3 +30,13 @@ License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 +Imports: + DBI, + dbplyr, + dplyr, + logger, + rlang, + RPostgres, + withr +Suggests: + rstudioapi diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1d8a5d2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,69 @@ +# using rocker r-vers as a base with R 4.3.1 +# https://hub.docker.com/r/rocker/r-ver +# https://rocker-project.org/images/versioned/r-ver.html +# +# sets CRAN repo to use Posit Package Manager to freeze R package versions to +# those available on 2023-10-30 +# https://packagemanager.posit.co/client/#/repos/2/overview +# https://packagemanager.posit.co/cran/__linux__/jammy/2023-10-30 + +# set proper base image +ARG R_VERS="4.3.1" +FROM rocker/r-ver:$R_VERS AS base + +# set Docker image labels +LABEL org.opencontainers.image.source=https://github.com/RMI-PACTA/workflow.factset +LABEL org.opencontainers.image.description="Extract FactSet Data for use in PACTA" +LABEL org.opencontainers.image.licenses=MIT +LABEL org.opencontainers.image.title="" +LABEL org.opencontainers.image.revision="" +LABEL org.opencontainers.image.version="" +LABEL org.opencontainers.image.vendor="" +LABEL org.opencontainers.image.base.name="" +LABEL org.opencontainers.image.ref.name="" +LABEL org.opencontainers.image.authors="" + +# set apt-get to noninteractive mode +ARG DEBIAN_FRONTEND="noninteractive" +ARG DEBCONF_NOWARNINGS="yes" + +RUN groupadd -r runner-workflow-factset \ + && useradd -r -g runner-workflow-factset runner-workflow-factset \ + && mkdir -p /home/runner-workflow-factset \ + && chown -R runner-workflow-factset /home/runner-workflow-factset +WORKDIR /home/runner-workflow-factset + +# install system dependencies +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + libicu-dev=70.* \ + libpq-dev=14.* \ + && chmod -R a+rwX /root \ + && rm -rf /var/lib/apt/lists/* + +# set frozen CRAN repo +ARG CRAN_REPO="https://packagemanager.posit.co/cran/__linux__/jammy/2023-10-30" +RUN echo "options(repos = c(CRAN = '$CRAN_REPO'), pkg.sysreqs = FALSE)" >> "${R_HOME}/etc/Rprofile.site" \ + # install packages for dependency resolution and installation + && Rscript -e "install.packages(c('pak', 'jsonlite'))" + +# Install R deopendencies +COPY DESCRIPTION /workflow.factset/DESCRIPTION + +# install R package dependencies +RUN Rscript -e "\ + deps <- pak::local_install_deps(root = '/workflow.factset'); \ + " + +# copy in everything from this repo +COPY . /workflow.factset + +# install R package dependencies +RUN Rscript -e "\ + pak::pkg_install('local::/workflow.factset'); \ + " + +USER runner-workflow-factset + +# set default run behavior +CMD ["Rscript", "-e", "logger::log_threshold(Sys.getenv('LOG_LEVEL', 'INFO'));workflow.factset::export_pacta_files()"] diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..6b2e361 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,13 @@ # Generated by roxygen2: do not edit by hand +export(connect_factset_db) +export(export_pacta_files) +export(get_entity_financing_data) +export(get_entity_info) +export(get_financial_data) +export(get_fund_data) +export(get_isin_to_fund_table) +export(get_iss_emissions_data) +importFrom(dplyr,"%>%") +importFrom(rlang,.data) +importFrom(rlang,.env) diff --git a/R/connect_factset_db.R b/R/connect_factset_db.R new file mode 100644 index 0000000..af938cd --- /dev/null +++ b/R/connect_factset_db.R @@ -0,0 +1,107 @@ +#' Export files for use in PACTA data preparation +#' +#' @param dbname name of the database to connect to +#' @param host hostname of the server to connect to +#' @param port port number of the server to connect to +#' @param options additional options to pass to the database connection. +#' Typically used to define schema search path. +#' @param username username to use for the database connection +#' @param password password to use for the database connection +#' +#' @return a database connection object +#' +#' @export + + +connect_factset_db <- function( + dbname = Sys.getenv("PGDATABASE"), + host = Sys.getenv("PGHOST"), + port = Sys.getenv("PGPORT", 5432L), + options = "-c search_path=fds", + username = Sys.getenv("PGUSER"), + password = Sys.getenv("PGPASSWORD") +) { + + if (username == "") { + logger::log_error( + "No database username could be found. ", + "Please set the username as an environment variable" + ) + } + + if (password == "") { + logger::log_error( + "No database password could be found. ", + "Please set the password as an environment variable" + ) + } + + logger::log_trace( + "Connecting to database {dbname} on {host}:{port} as {username}" + ) + conn <- + DBI::dbConnect( + drv = RPostgres::Postgres(), + dbname = dbname, + host = host, + port = port, + user = username, + password = password, + options = options + ) + + reg_conn_finalizer(conn, DBI::dbDisconnect, parent.frame()) +} + +# connection finalizer to ensure connection is closed -------------------------- +# adapted from: https://shrektan.com/post/2019/07/26/create-a-database-connection-that-can-be-disconnected-automatically/ #nolint + +reg_conn_finalizer <- function( + conn, + close_fun, + envir +) { + is_parent_global <- identical(.GlobalEnv, envir) + + if (isTRUE(is_parent_global)) { + env_finalizer <- new.env(parent = emptyenv()) + env_finalizer$conn <- conn + attr(conn, "env_finalizer") <- env_finalizer + + reg.finalizer(env_finalizer, function(e) { + if (DBI::dbIsValid(e$conn)) { + warn_db_autoclose(e$conn) + try(close_fun(e$conn)) + } + }, + onexit = TRUE + ) + } else { + withr::defer( + { + if (DBI::dbIsValid(conn)) { + warn_db_autoclose(conn) + try(close_fun(conn)) + } + }, + envir = envir, + priority = "last" + ) + } + + logger::log_trace("Database connection registered for finalization") + return(conn) +} + +warn_db_autoclose <- function(conn) { + dbname <- DBI::dbGetInfo(conn)$dbname + host <- DBI::dbGetInfo(conn)$host + logger::log_warn( + "The database connection to ", + dbname, + " on ", + host, + " was closed automatically ", + "because the calling environment was closed." + ) +} diff --git a/R/export_pacta_files.R b/R/export_pacta_files.R new file mode 100644 index 0000000..c11eb0c --- /dev/null +++ b/R/export_pacta_files.R @@ -0,0 +1,162 @@ +#' Export files for use in PACTA data preparation +#' +#' @param Destination directory for the output files +#' +#' @param destination path to directory where exported files will be saved +#' @param data_timestamp filter data as-of this timestamp +#' +#' @return vector of paths to exported files +#' +#' @export + +export_pacta_files <- function( + conn = connect_factset_db(), + destination = file.path(Sys.getenv("EXPORT_DESTINATION")), + data_timestamp = Sys.getenv("DATA_TIMESTAMP", Sys.time()), + terminate_connection = ( + # Terminate connection if it was created by this function. + deparse(substitute(conn)) == formals(export_pacta_files)[["conn"]] + ) +) { + + # Prepare output directories + + if (!dir.exists(destination)) { + logger::log_error( + "The destination directory {destination} does not exist." + ) + stop("Destination directory does not exist.") + } + + if (Sys.getenv("DEPLOY_START_TIME") == "") { + logger::log_warn( + "The environment variable DEPLOY_START_TIME is not set. ", + "Using current system time as start time." + ) + } + + start_time_chr <- Sys.getenv( + "DEPLOY_START_TIME", + format(Sys.time(), format = "%Y%m%dT%H%M%S", tz = "UTC"), + ) + + if (inherits(data_timestamp, "character")) { + data_timestamp <- lubridate::ymd_hms( + data_timestamp, + quiet = TRUE, + tz = "UTC", + truncated = 3 + ) + } + + if (inherits(data_timestamp, "POSIXct")) { + data_timestamp_chr <- format( + data_timestamp, + format = "%Y%m%dT%H%M%S", + tz = "UTC" + ) + } else { + logger::log_error( + "The data_timestamp argument must be a POSIXct object ", + "or a character string coercible to POSIXct format", + " (using lubridate::ymd_hms(truncated = 3))." + ) + stop("Invalid data_timestamp argument.") + } + + export_dir <- file.path( + destination, + paste0(data_timestamp_chr, "_pulled", start_time_chr) + ) + + if (!dir.exists(export_dir)) { + dir.create(export_dir, recursive = TRUE) + } + + # Start Extracting Data + + financial_data_path <- file.path( + export_dir, + "factset_financial_data.rds" + ) + logger::log_info("Fetching financial data.") + financial_data <- get_financial_data( + conn = conn, + data_timestamp = data_timestamp + ) + logger::log_info("Exporting financial data to {factset_financial_data_path}") + saveRDS(object = financial_data, file = financial_data_path) + + entity_info_path <- file.path(export_dir, "factset_entity_info.rds") + logger::log_info("Fetching entity info data.") + entity_info <- get_entity_info(conn = conn) + logger::log_info("Exporting entity info data to {factset_entity_info_path}") + saveRDS(object = entity_info, file = entity_info_path) + + entity_financing_data_path <- file.path( + export_dir, + "factset_entity_financing_data.rds" + ) + logger::log_info("Fetching entity financing data.") + entity_financing_data <- get_entity_financing_data( + conn = conn, + data_timestamp = data_timestamp + ) + logger::log_info( + "Exporting entity financing data to {factset_entity_financing_data_path}" + ) + saveRDS( + object = entity_financing_data, + file = entity_financing_data_path + ) + + fund_data_path <- file.path(export_dir, "factset_fund_data.rds") + logger::log_info("Fetching fund data.") + fund_data <- get_fund_data(conn = conn) + logger::log_info("Exporting fund data to {factset_fund_data_path}") + saveRDS(object = fund_data, file = fund_data_path) + + isin_to_fund_table_path <- file.path( + export_dir, + "factset_isin_to_fund_table.rds" + ) + logger::log_info("Fetching ISIN to fund table.") + isin_to_fund_table <- get_isin_to_fund_table(conn = conn) + logger::log_info( + "Exporting ISIN to fund table to {factset_isin_to_fund_table_path}" + ) + saveRDS(object = isin_to_fund_table, file = isin_to_fund_table_path) + + iss_emissions_path <- file.path( + export_dir, + "factset_iss_emissions.rds" + ) + logger::log_info("Fetching ISS emissions data.") + iss_emissions <- get_iss_emissions_data(conn = conn) + logger::log_info( + "Exporting ISS emissions data to {factset_iss_emissions_path}" + ) + saveRDS(object = iss_emissions, file = iss_emissions_path) + + + logger::log_info("Done with data export.") + + # Terminate connection if needed + if (terminate_connection) { + logger::log_info("Terminating database connection.") + DBI::dbDisconnect(conn) + } + + return( + invisible( + c( + financial_data_path = financial_data_path, + entity_info_path = entity_info_path, + entity_financing_data_path = entity_financing_data_path, + fund_data_path = fund_data_path, + isin_to_fund_table_path = isin_to_fund_table_path, + iss_emissions_path = iss_emissions_path + ) + ) + ) +} diff --git a/R/get_entity_financing_data.R b/R/get_entity_financing_data.R new file mode 100644 index 0000000..2afe796 --- /dev/null +++ b/R/get_entity_financing_data.R @@ -0,0 +1,96 @@ +#' Get the entity financing data from the FactSet database and prepare the +#' `factset_entity_financing_data` tibble +#' +#' @param conn databse connection +#' @param data_timestamp A single string specifying the desired date for the +#' data in the form "2021-12-31" +#' +#' @return A tibble properly prepared to be saved as the +#' `factset_entity_financing_data.rds` output file +#' +#' @export + +get_entity_financing_data <- function( + conn, + data_timestamp +) { + # get fsym_id to fundamentals fsym_company_id -------------------------------- + + logger::log_debug("Extracting entity financing info from database.") + logger::log_debug("using data timestamp: ", data_timestamp) + + logger::log_trace("Accessing security map - FactSet Fundamentals.") + ff_fsym_company_id <- dplyr::tbl(conn, "ff_v3_ff_sec_map") + + logger::log_trace("Accessing security map - FactSet Ownership.") + own_fsym_company_id <- dplyr::tbl(conn, "own_v5_own_sec_map") + + logger::log_trace("UNIONing security maps.") + fsym_company_id <- dplyr::union_all( + ff_fsym_company_id, + own_fsym_company_id + ) + + + # get fsym_id to factset_entity_id ------------------------------------------- + + logger::log_trace("Accessing security to entity map - FactSet Fundamentals.") + ff_sec_entity <- dplyr::tbl(conn, "ff_v3_ff_sec_entity") + + logger::log_trace("Accessing security to entity map - FactSet Ownership.") + own_sec_entity <- dplyr::tbl(conn, "own_v5_own_sec_entity") + + logger::log_trace("UNIONing security to entity maps.") + sec_entity <- dplyr::union_all( + ff_sec_entity, + own_sec_entity + ) + + + # get market value data ------------------------------------------------------ + + logger::log_trace("Accessing market value data.") + ff_mkt_val <- dplyr::tbl(conn, "ff_v3_ff_basic_der_af") %>% + dplyr::select("fsym_id", "date", "currency", "ff_mkt_val") + + + # get debt outstanding data -------------------------------------------------- + + logger::log_trace("Accessing balance sheet data.") + ff_debt <- dplyr::tbl(conn, "ff_v3_ff_basic_af") %>% + dplyr::select("fsym_id", "date", "currency", "ff_debt") + + + # merge and collect the data, then disconnect -------------------------------- + + logger::log_trace("Merging entity financing data.") + entity_financing_data <- ff_mkt_val %>% + dplyr::full_join( + ff_debt, + by = c("fsym_id", "date", "currency") + ) %>% + dplyr::left_join(fsym_company_id, by = "fsym_id") %>% + dplyr::inner_join(sec_entity, by = c("fsym_company_id" = "fsym_id")) %>% + dplyr::filter(!(is.na(.data$ff_mkt_val) & is.na(.data$ff_debt))) %>% + dplyr::group_by(.data$fsym_id, .data$currency) %>% + dplyr::filter(.data$date <= .env$data_timestamp) %>% + dplyr::filter( + lubridate::year(.data$date) == lubridate::year(data_timestamp) + ) %>% + dplyr::filter(.data$date == max(.data$date)) %>% + dplyr::ungroup() + + logger::log_trace("Downloading entity financing data.") + entity_financing_data <- entity_financing_data %>% + dplyr::collect() %>% + dplyr::mutate( + # convert units from millions to units + ff_mkt_val = .data$ff_mkt_val * 1e6, + ff_debt = .data$ff_debt * 1e6 + ) %>% + dplyr::distinct() + + # return the entity financing data ------------------------------------------- + + entity_financing_data +} diff --git a/R/get_entity_info.R b/R/get_entity_info.R new file mode 100644 index 0000000..a5b3cd3 --- /dev/null +++ b/R/get_entity_info.R @@ -0,0 +1,147 @@ +#' Get the entity info data from the FactSet database and prepare the +#' `factset_entity_info` tibble +#' +#' @param conn database connection +#' +#' @return A tibble properly prepared to be saved as the +#' `factset_entity_info.rds` output file +#' +#' @export + +get_entity_info <- + function(conn) { + # build connection to database --------------------------------------------- + + logger::log_debug("Extracting entity info from database.") + + # company_name ------------------------------------------------------------- + + logger::log_trace("Accessing entity proper names.") + entity_proper_name <- + dplyr::tbl(conn, "sym_v1_sym_entity") %>% + dplyr::select("factset_entity_id", "entity_proper_name") + + + # country_of_domicile ------------------------------------------------------ + + logger::log_trace("Accessing entity country of domicile.") + iso_country <- + dplyr::tbl(conn, "sym_v1_sym_entity") %>% + dplyr::select("factset_entity_id", "iso_country") + + + # sector ------------------------------------------------------------------- + + logger::log_trace("Accessing entity sector.") + sector_code <- + dplyr::tbl(conn, "sym_v1_sym_entity_sector") %>% + dplyr::select("factset_entity_id", "sector_code") + + logger::log_trace("Accessing sector descriptions.") + sector_code__sector_desc <- + dplyr::tbl(conn, "ref_v2_factset_sector_map") %>% + dplyr::select(.data$factset_sector_code, .data$factset_sector_desc) + + logger::log_trace("Merging sector codes and sector descriptions.") + factset_sector_desc <- + sector_code %>% + dplyr::left_join( + sector_code__sector_desc, + by = c("sector_code" = "factset_sector_code") + ) %>% + dplyr::select("factset_entity_id", "sector_code", "factset_sector_desc") + + + # sub-sector/industry ------------------------------------------------------ + + logger::log_trace("Accessing entity industry codes.") + industry_code <- + dplyr::tbl(conn, "sym_v1_sym_entity_sector") %>% + dplyr::select("factset_entity_id", "industry_code") + + logger::log_trace("Accessing industry descriptions") + industry_code__industry_desc <- + dplyr::tbl(conn, "ref_v2_factset_industry_map") %>% + dplyr::select("factset_industry_code", "factset_industry_desc") + + logger::log_trace("Merging industry codes and industry descriptions.") + factset_industry_desc <- + industry_code %>% + dplyr::left_join( + industry_code__industry_desc, + by = c("industry_code" = "factset_industry_code") + ) %>% + dplyr::select( + "factset_entity_id", + "industry_code", + "factset_industry_desc" + ) + + + # credit risk parent ------------------------------------------------------- + + logger::log_trace("Accessing entity affiliates.") + ent_v1_ent_entity_affiliates <- dplyr::tbl( + conn, + "ent_v1_ent_entity_affiliates" + ) + + logger::log_trace("Accessing affiliate type map.") + ref_v2_affiliate_type_map <- dplyr::tbl( + conn, + "ref_v2_affiliate_type_map" + ) + + logger::log_trace("Determining last update time for entity affiliates.") + affiliates_last_update <- + dplyr::tbl(conn, "fds_fds_file_history") %>% + dplyr::filter(.data$table_name == "ent_entity_affiliates") %>% + dplyr::filter( + .data$begin_time == max(.data$begin_time, na.rm = TRUE) + ) %>% + # pull also handles `collect`ing the data + dplyr::pull("begin_time") + + logger::log_trace("Determining credit risk parent via entity affiliates.") + credit_parent_id <- + ent_v1_ent_entity_affiliates %>% + dplyr::left_join(ref_v2_affiliate_type_map, by = "aff_type_code") %>% + dplyr::filter(.data$aff_type_desc == "Credit Risk Parent") %>% + dplyr::select( + factset_entity_id = "factset_affiliated_entity_id", + credit_parent_id = "factset_entity_id" + ) %>% + dplyr::mutate( + ent_entity_affiliates_last_update = affiliates_last_update + ) + + + # merge and collect -------------------------------------------------------- + + logger::log_trace("Merging entity info.") + entity_info <- + entity_proper_name %>% + dplyr::left_join( + iso_country, + by = "factset_entity_id" + ) %>% + dplyr::left_join( + factset_sector_desc, + by = "factset_entity_id" + ) %>% + dplyr::left_join( + factset_industry_desc, + by = "factset_entity_id" + ) %>% + dplyr::left_join( + credit_parent_id, + by = "factset_entity_id" + ) + + logger::log_trace("Downloading merged entity info from database.") + entity_info <- dplyr::collect(entity_info) + logger::log_trace("Download complete.") + + # return prepared data ----------------------------------------------------- + return(entity_info) + } diff --git a/R/get_financial_data.R b/R/get_financial_data.R new file mode 100644 index 0000000..c6b3555 --- /dev/null +++ b/R/get_financial_data.R @@ -0,0 +1,92 @@ +#' Get the factset financial data from the FactSet database and prepare the +#' `factset_financial_data` tibble +#' +#' @param conn databse connection +#' @param data_timestamp A single string specifying the desired date for the +#' data in the form "2021-12-31" +#' +#' @return A tibble properly prepared to be saved as the +#' `factset_financial_data.rds` output file +#' +#' @export + +get_financial_data <- + function(conn, data_timestamp, ...) { + # build connection to database --------------------------------------------- + + logger::log_debug("Extracting financial info from database.") + logger::log_info("using data timestamp: ", data_timestamp) + + + # factset_entity_id ----------------------------------------------- + + logger::log_trace("Accessing entity id.") + factset_entity_id <- + dplyr::tbl(conn, "own_v5_own_sec_entity") %>% + dplyr::select("fsym_id", "factset_entity_id") + + + # isin --------------------------------------------------------------------- + + logger::log_trace("Accessing ISINs.") + isin <- dplyr::tbl(conn, "sym_v1_sym_isin") + + + # adj_price ---------------------------------------------------------------- + + logger::log_trace( + "Accessing share prices. ", + "Filtering to date: {data_timestamp}" + ) + adj_price <- + dplyr::tbl(conn, "own_v5_own_sec_prices") %>% + dplyr::filter(.data$price_date == .env$data_timestamp) %>% + dplyr::select("fsym_id", "adj_price") + + + # adj_shares_outstanding --------------------------------------------------- + + logger::log_trace( + "Accessing shares outstanding. ", + "Filtering to date: {data_timestamp}" + ) + adj_shares_outstanding <- + dplyr::tbl(conn, "own_v5_own_sec_prices") %>% + dplyr::filter(.data$price_date == .env$data_timestamp) %>% + dplyr::select("fsym_id", "adj_shares_outstanding") + + + # issue_type --------------------------------------------------------------- + + logger::log_trace("Accessing issue type.") + issue_type <- + dplyr::tbl(conn, "own_v5_own_sec_coverage") %>% + dplyr::select("fsym_id", "issue_type") + + + # one_adr_eq --------------------------------------------------------------- + + logger::log_trace("Accessing ADR equivilents.") + one_adr_eq <- + dplyr::tbl(conn, "own_v5_own_sec_adr_ord_ratio") %>% + dplyr::select("fsym_id" = "adr_fsym_id", "one_adr_eq") + + + # merge and collect -------------------------------------------------------- + + logger::log_trace("Merging financial info.") + fin_data <- + isin %>% + dplyr::left_join(factset_entity_id, by = "fsym_id") %>% + dplyr::left_join(adj_price, by = "fsym_id") %>% + dplyr::left_join(adj_shares_outstanding, by = "fsym_id") %>% + dplyr::left_join(issue_type, by = "fsym_id") %>% + dplyr::left_join(one_adr_eq, by = "fsym_id") + + logger::log_trace("Downloading merged financial info from database.") + fin_data <- dplyr::collect(fin_data) + logger::log_trace("Download complete.") + + # return prepared data ----------------------------------------------------- + return(fin_data) + } diff --git a/R/get_fund_data.R b/R/get_fund_data.R new file mode 100644 index 0000000..9a555ab --- /dev/null +++ b/R/get_fund_data.R @@ -0,0 +1,98 @@ +#' Get the fund data from the FactSet database and prepare the +#' `factset_fund_data` tibble +#' +#' @param conn databse connection +#' @param data_timestamp A single string specifying the desired date for the +#' data in the form "2021-12-31" +#' +#' @return A tibble properly prepared to be saved as the `factset_fund_data.rds` +#' output file +#' +#' @export + +get_fund_data <- function(conn, data_timestamp) { + # get the fund holdings and the holdings' reported market value ------------ + + logger::log_debug("Extracting financial info from database.") + logger::log_info("using data timestamp: ", data_timestamp) + + logger::log_trace( + "Accessing historical fund holdings - security level. ", + "Filtering to date: {data_timestamp}" + ) + fund_security <- + dplyr::tbl(conn, "own_v5_own_fund_detail") %>% + dplyr::filter(.data$report_date == .env$data_timestamp) %>% + dplyr::select( + factset_fund_id = "factset_fund_id", + holding_fsym_id = "fsym_id", + holding_reported_mv = "reported_mv" + ) + + logger::log_trace( + "Accessing historical fund holdings - non-securities. ", + "Filtering to date: {data_timestamp}" + ) + fund_nonsecurity <- + dplyr::tbl(conn, "own_v5_own_fund_generic") %>% + dplyr::filter(.data$report_date == .env$data_timestamp) %>% + dplyr::select( + factset_fund_id = "factset_fund_id", + holding_fsym_id = "generic_id", + holding_reported_mv = "reported_mv" + ) + + logger::log_trace( + "Combining historical fund holdings - security and non-security." + ) + fund_holding <- + dplyr::union_all( + fund_security, + fund_nonsecurity + ) + + + # get the fund total reported market value --------------------------------- + + logger::log_trace( + "Accessing historical fund filings.", + "Filtering to date: {data_timestamp}" + ) + fund_mv <- + dplyr::tbl(conn, "own_v5_own_ent_fund_filing_hist") %>% + dplyr::filter(.data$report_date == .env$data_timestamp) %>% + dplyr::select("factset_fund_id", "total_reported_mv") + + + logger::log_trace( + "Accessing current ISIN mappings.", + ) + # symbology containing the ISIN to fsym_id link + fsym_id__isin <- + dplyr::tbl(conn, "sym_v1_sym_isin") + + + # merge and collect the data, then disconnect ------------------------------ + + logger::log_trace("Merging the data.") + fund_data <- + fund_mv %>% + dplyr::filter( + .data$total_reported_mv != 0 | !is.na(.data$total_reported_mv) + ) %>% + dplyr::left_join(fund_holding, by = "factset_fund_id") %>% + dplyr::left_join(fsym_id__isin, by = c(`holding_fsym_id` = "fsym_id")) %>% + dplyr::select( + factset_fund_id = "factset_fund_id", + fund_reported_mv = "total_reported_mv", + holding_isin = "isin", + holding_reported_mv = "holding_reported_mv" + ) + + logger::log_trace("Downloading fund data.") + fund_data <- dplyr::collect(fund_data) + + # return the fund data ----------------------------------------------------- + + return(fund_data) +} diff --git a/R/get_isin_to_fund_table.R b/R/get_isin_to_fund_table.R new file mode 100644 index 0000000..bf76142 --- /dev/null +++ b/R/get_isin_to_fund_table.R @@ -0,0 +1,40 @@ +#' Get the isin_to_fund_table data from the FactSet database and prepare the +#' `factset_isin_to_fund_table` tibble +#' +#' @param conn database connection +#' +#' @return A tibble properly prepared to be saved as the +#' `factset_isin_to_fund_table.rds` output file +#' +#' @export + +get_isin_to_fund_table <- function(conn) { + # get the ISIN to fsym_id table -------------------------------------------- + + logger::info("Getting ISIN to fsym_id mapping") + isin <- + dplyr::tbl(conn, "sym_v1_sym_isin") %>% + dplyr::select("isin", "fsym_id") + + + # get the fsym_id to fund_id table ----------------------------------------- + + logger::info("Getting fsym_id to fund id mapping") + fund_id <- + dplyr::tbl(conn, "own_v5_own_ent_fund_identifiers") %>% + dplyr::filter(.data$identifier_type == "FSYM_ID") %>% + dplyr::select(fsym_id = "fund_identifier", "factset_fund_id") + + + # merge and collect the data ------------------------------ + + logger::info("Merging ISIN to fsym_id and fsym_id to fund_id") + isin__factset_fund_id <- + fund_id %>% + dplyr::inner_join(isin, by = "fsym_id") %>% + dplyr::select("isin", "fsym_id", "factset_fund_id") %>% + dplyr::collect() + + # return the ISIN to fund_id table ----------------------------------------- + return(isin__factset_fund_id) +} diff --git a/R/get_iss_emissions_data.R b/R/get_iss_emissions_data.R new file mode 100644 index 0000000..17dd790 --- /dev/null +++ b/R/get_iss_emissions_data.R @@ -0,0 +1,108 @@ +#' Get the ISS emissions data from the FactSet database and prepare the +#' `factset_iss_emissions` tibble +#' +#' @param conn databse connection +#' @param year A single numeric specifying the year of data to be returned +#' @param min_estimated_trust A single numeric specifying the minimum allowed +#' "estimated trust" value +#' @param min_reported_trust A single numeric specifying the minimum allowed +#' "reported trust" value +#' @param ... Arguments to be passed to the `connect_conn()` function (for +#' specifying database connection parameters) +#' +#' @return A tibble properly prepared to be saved as the +#' `factset_iss_emissions.rds` output file +#' +#' @export + +get_iss_emissions_data <- function( + conn, + reporting_year, + min_estimated_trust = 0.0, + min_reported_trust = 0.0 +) { + # convert `year` to date --------------------------------------------------- + sql_filter_date <- as.Date(paste0(reporting_year, "-01-01"), "%Y-%m-%d") + + # get the relevant fsym_id to factset_entity_id table ---------------------- + fsym_id__factset_entity_id <- + dplyr::tbl(conn, "icc_v2_icc_sec_entity_hist") %>% + # end_date identifies the date the identifier was last associated with + # fsym_id i.e. if there is no end_date (end_date == NA) then the + # association is still valid + dplyr::filter( + .data$end_date >= sql_filter_date | is.na(.data$end_date) + ) %>% + dplyr::filter(!is.na(.data$fsym_id)) %>% + dplyr::filter(!is.na(.data$factset_entity_id)) %>% + dplyr::select("fsym_id", "factset_entity_id") %>% + dplyr::distinct() + + + # get the relevant icc_security_id to factset_entity_id table -------------- + + icc_security_id <- + dplyr::tbl(conn, "icc_v2_icc_factset_id_map") %>% + dplyr::filter(.data$provider_id_type == "icc_security_id") %>% + dplyr::filter(.data$factset_id_type == "fsym_security_id") %>% + dplyr::filter(!is.na(.data$factset_id)) %>% + # do not use a fsym_id that was started in the current year to avoid data + # based on a partial year + dplyr::filter(.data$id_start_date < sql_filter_date) %>% + # end_date identifies the date the identifier was last associated with + # fsym_id i.e. if there is no end_date (end_date == NA) then the + # association is still valid + dplyr::filter( + .data$id_end_date >= sql_filter_date | is.na(.data$id_end_date) + ) %>% + dplyr::select(icc_security_id = "provider_id", fsym_id = "factset_id") %>% + dplyr::inner_join(fsym_id__factset_entity_id, by = "fsym_id") %>% + dplyr::select("icc_security_id", "factset_entity_id") %>% + dplyr::distinct() + + + # get the factset_entity_id to icc_total_emissions data -------------------- + + icc_total_emissions <- + dplyr::tbl(conn, "icc_v2_icc_carbon_climate_core") %>% + dplyr::filter(.data$icc_emissions_fiscal_year == .env$reporting_year) %>% + dplyr::group_by(.data$icc_security_id, .data$icc_emissions_fiscal_year) %>% + # icc_archive_date marks the date a data point was submitted, and some + # times there are updates of previous data submissions, so we need to + # dplyr::filter only for the most recent submission + dplyr::filter( + .data$icc_archive_date == max(.data$icc_archive_date, na.rm = TRUE) + ) %>% + dplyr::ungroup() %>% + dplyr::group_by(.data$icc_company_id, .data$icc_emissions_fiscal_year) %>% + dplyr::filter( + .data$icc_archive_date == max(.data$icc_archive_date, na.rm = TRUE) + ) %>% + dplyr::ungroup() %>% + dplyr::filter( + .data$icc_emissions_estimated_trust > min_estimated_trust | + .data$icc_emissions_reported_trust > min_reported_trust + ) %>% + dplyr::select( + "icc_security_id", + "icc_total_emissions", + "icc_scope_3_emissions" + ) %>% + dplyr::inner_join(icc_security_id, by = "icc_security_id") %>% + dplyr::select( + "factset_entity_id", + "icc_total_emissions", + "icc_scope_3_emissions" + ) + + # collect the data, then disconnect ---------------------------------------- + + logger::log_trace("Downloading emissions data.") + icc_total_emissions <- + icc_total_emissions %>% + dplyr::collect() + + # return the factset_entity_id to icc_total_emissions data ----------------- + + return(icc_total_emissions) +} diff --git a/R/workflow.factset-package.R b/R/workflow.factset-package.R new file mode 100644 index 0000000..4293484 --- /dev/null +++ b/R/workflow.factset-package.R @@ -0,0 +1,9 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom dplyr %>% +#' @importFrom rlang .data +#' @importFrom rlang .env +## usethis namespace: end +NULL diff --git a/README.md b/README.md new file mode 100644 index 0000000..1a81e56 --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +# workflow.pacta + +## Running container + +```sh +docker run -i -t --rm --env-file=.env -v ./foo:/mnt/factset-data IMAGE_NAME +``` + +```sh +# change this value as needed. +RESOURCEGROUP="myResourceGroup" + +# run from repo root + +az deployment group create --resource-group "$RESOURCEGROUP" --template-file azure-deploy.json --parameters @azure-deploy.parameters.json + +``` diff --git a/azure-deploy.json b/azure-deploy.json new file mode 100644 index 0000000..6a1a685 --- /dev/null +++ b/azure-deploy.json @@ -0,0 +1,160 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "0.0.0.5", + + "parameters": { + "location": { + "type": "string", + "defaultValue": "[resourceGroup().location]", + "metadata": { + "description": "Location for all resources." + } + }, + "identity": { + "type": "string", + "metadata": { + "description": "The ID of the user assigned identity to use for the container group." + } + }, + "containerGroupName": { + "type": "string", + "metadata": { + "description": "The name of the container group." + } + }, + "restartPolicy": { + "type": "string", + "defaultValue": "OnFailure", + "allowedValues": [ + "Always", + "Never", + "OnFailure" + ], + "metadata": { + "description": "The behavior of Azure runtime if container has stopped." + } + }, + "rawdata-storageaccountkey": { + "type": "securestring", + "metadata": { + "description": "The storage account key for the rawdata storage account." + } + }, + "database-password": { + "type": "securestring", + "metadata": { + "description": "password to connect to database" + } + }, + "starttime": { + "type": "string", + "defaultValue": "[utcNow()]", + "metadata": { + "description": "The time to start the container group." + } + } + }, + + "variables": { + "PGDATABASE": "FDS", + "PGHOST": "[concat('factset-01-postgres', '.postgres.database.azure.com')]", + "PGUSER": "postgres", + "containerregistry": "ghcr.io/rmi-pacta", + "machineCpuCores": 1, + "machineMemoryInGB": 16, + "mountPathExport": "/mnt/factset-extracted" + }, + + "functions": [], + + "resources": [ + { + "type": "Microsoft.ContainerInstance/containerGroups", + "apiVersion": "2021-09-01", + "name": "[parameters('containerGroupName')]", + "location": "[parameters('location')]", + "identity": { + "type": "UserAssigned", + "userAssignedIdentities": { + "[parameters('identity')]": {} + } + }, + "properties": { + "containers": [ + { + "name": "loader-runner", + "properties": { + "image": "[concat(variables('containerregistry'),'/workflow.factset:pr1')]", + "ports": [], + "resources": { + "requests": { + "cpu": "[variables('machineCpuCores')]", + "memoryInGB": "[variables('machineMemoryInGB')]" + } + }, + "environmentVariables": [ + { + "name": "PGUSER", + "value": "[variables('PGUSER')]" + }, + { + "name": "PGPASSWORD", + "secureValue": "[parameters('database-password')]" + }, + { + "name": "PGHOST", + "value": "[variables('PGHOST')]" + }, + { + "name": "PGDATABASE", + "value": "[variables('PGDATABASE')]" + }, + { + "name": "DEPLOY_START_TIME", + "value": "[parameters('starttime')]" + }, + { + "name": "MACHINE_CORES", + "value": "[variables('machineCpuCores')]" + }, + { + "name": "LOG_LEVEL", + "value": "TRACE" + }, + { + "name": "EXPORT_DESTINATION", + "value": "[variables('mountPathExport')]" + }, + { + "name": "DATA_TIMESTAMP", + "value": "20230123" + } + + ], + "volumeMounts": [ + { + "name": "factset-extracted", + "mountPath": "[variables('mountPathExport')]" + } + ] + } + } + ], + "restartPolicy": "[parameters('restartPolicy')]", + "osType": "Linux", + "volumes": [ + { + "name": "factset-extracted", + "azureFile": { + "shareName": "factset-extracted", + "readOnly": false, + "storageAccountName": "pactarawdata", + "storageAccountKey": "[parameters('rawdata-storageaccountkey')]" + } + } + ] + } + } + ], + "outputs": {} +} diff --git a/example.env b/example.env new file mode 100644 index 0000000..e615517 --- /dev/null +++ b/example.env @@ -0,0 +1,7 @@ +DEPLOY_START_TIME=20000101T000001 +EXPORT_DESTINATION=/mnt/factset-data +LOG_LEVEL=TRACE +PGDATABASE=FDS +PGHOST=postgres.example.com +PGPASSWORD=SuperSecrtPassw0rd +PGUSER=postgres diff --git a/man/connect_factset_db.Rd b/man/connect_factset_db.Rd new file mode 100644 index 0000000..ee82e71 --- /dev/null +++ b/man/connect_factset_db.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/connect_factset_db.R +\name{connect_factset_db} +\alias{connect_factset_db} +\title{Export files for use in PACTA data preparation} +\usage{ +connect_factset_db( + dbname = Sys.getenv("PGDATABASE"), + host = Sys.getenv("PGHOST"), + port = Sys.getenv("PGPORT", 5432L), + options = "-c search_path=fds", + username = Sys.getenv("PGUSER"), + password = Sys.getenv("PGPASSWORD") +) +} +\arguments{ +\item{dbname}{name of the database to connect to} + +\item{host}{hostname of the server to connect to} + +\item{port}{port number of the server to connect to} + +\item{options}{additional options to pass to the database connection. +Typically used to define schema search path.} + +\item{username}{username to use for the database connection} + +\item{password}{password to use for the database connection} +} +\value{ +a database connection object +} +\description{ +Export files for use in PACTA data preparation +} diff --git a/man/export_pacta_files.Rd b/man/export_pacta_files.Rd new file mode 100644 index 0000000..a11143e --- /dev/null +++ b/man/export_pacta_files.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/export_pacta_files.R +\name{export_pacta_files} +\alias{export_pacta_files} +\title{Export files for use in PACTA data preparation} +\usage{ +export_pacta_files( + conn = connect_factset_db(), + destination = file.path(Sys.getenv("EXPORT_DESTINATION")), + data_timestamp = Sys.getenv("DATA_TIMESTAMP", Sys.time()), + terminate_connection = (deparse(substitute(conn)) == + formals(export_pacta_files)[["conn"]]) +) +} +\arguments{ +\item{destination}{path to directory where exported files will be saved} + +\item{data_timestamp}{filter data as-of this timestamp} + +\item{Destination}{directory for the output files} +} +\value{ +vector of paths to exported files +} +\description{ +Export files for use in PACTA data preparation +} diff --git a/man/get_entity_financing_data.Rd b/man/get_entity_financing_data.Rd new file mode 100644 index 0000000..1cfb624 --- /dev/null +++ b/man/get_entity_financing_data.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_entity_financing_data.R +\name{get_entity_financing_data} +\alias{get_entity_financing_data} +\title{Get the entity financing data from the FactSet database and prepare the +\code{factset_entity_financing_data} tibble} +\usage{ +get_entity_financing_data(conn, data_timestamp) +} +\arguments{ +\item{conn}{databse connection} + +\item{data_timestamp}{A single string specifying the desired date for the +data in the form "2021-12-31"} +} +\value{ +A tibble properly prepared to be saved as the +\code{factset_entity_financing_data.rds} output file +} +\description{ +Get the entity financing data from the FactSet database and prepare the +\code{factset_entity_financing_data} tibble +} diff --git a/man/get_entity_info.Rd b/man/get_entity_info.Rd new file mode 100644 index 0000000..564b5d4 --- /dev/null +++ b/man/get_entity_info.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_entity_info.R +\name{get_entity_info} +\alias{get_entity_info} +\title{Get the entity info data from the FactSet database and prepare the +\code{factset_entity_info} tibble} +\usage{ +get_entity_info(conn) +} +\arguments{ +\item{conn}{database connection} +} +\value{ +A tibble properly prepared to be saved as the +\code{factset_entity_info.rds} output file +} +\description{ +Get the entity info data from the FactSet database and prepare the +\code{factset_entity_info} tibble +} diff --git a/man/get_financial_data.Rd b/man/get_financial_data.Rd new file mode 100644 index 0000000..efc14ae --- /dev/null +++ b/man/get_financial_data.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_financial_data.R +\name{get_financial_data} +\alias{get_financial_data} +\title{Get the factset financial data from the FactSet database and prepare the +\code{factset_financial_data} tibble} +\usage{ +get_financial_data(conn, data_timestamp, ...) +} +\arguments{ +\item{conn}{databse connection} + +\item{data_timestamp}{A single string specifying the desired date for the +data in the form "2021-12-31"} +} +\value{ +A tibble properly prepared to be saved as the +\code{factset_financial_data.rds} output file +} +\description{ +Get the factset financial data from the FactSet database and prepare the +\code{factset_financial_data} tibble +} diff --git a/man/get_fund_data.Rd b/man/get_fund_data.Rd new file mode 100644 index 0000000..c6113a1 --- /dev/null +++ b/man/get_fund_data.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_fund_data.R +\name{get_fund_data} +\alias{get_fund_data} +\title{Get the fund data from the FactSet database and prepare the +\code{factset_fund_data} tibble} +\usage{ +get_fund_data(conn, data_timestamp) +} +\arguments{ +\item{conn}{databse connection} + +\item{data_timestamp}{A single string specifying the desired date for the +data in the form "2021-12-31"} +} +\value{ +A tibble properly prepared to be saved as the \code{factset_fund_data.rds} +output file +} +\description{ +Get the fund data from the FactSet database and prepare the +\code{factset_fund_data} tibble +} diff --git a/man/get_isin_to_fund_table.Rd b/man/get_isin_to_fund_table.Rd new file mode 100644 index 0000000..2112e16 --- /dev/null +++ b/man/get_isin_to_fund_table.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_isin_to_fund_table.R +\name{get_isin_to_fund_table} +\alias{get_isin_to_fund_table} +\title{Get the isin_to_fund_table data from the FactSet database and prepare the +\code{factset_isin_to_fund_table} tibble} +\usage{ +get_isin_to_fund_table(conn) +} +\arguments{ +\item{conn}{database connection} +} +\value{ +A tibble properly prepared to be saved as the +\code{factset_isin_to_fund_table.rds} output file +} +\description{ +Get the isin_to_fund_table data from the FactSet database and prepare the +\code{factset_isin_to_fund_table} tibble +} diff --git a/man/get_iss_emissions_data.Rd b/man/get_iss_emissions_data.Rd new file mode 100644 index 0000000..ab78fd4 --- /dev/null +++ b/man/get_iss_emissions_data.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_iss_emissions_data.R +\name{get_iss_emissions_data} +\alias{get_iss_emissions_data} +\title{Get the ISS emissions data from the FactSet database and prepare the +\code{factset_iss_emissions} tibble} +\usage{ +get_iss_emissions_data( + conn, + reporting_year, + min_estimated_trust = 0, + min_reported_trust = 0 +) +} +\arguments{ +\item{conn}{databse connection} + +\item{min_estimated_trust}{A single numeric specifying the minimum allowed +"estimated trust" value} + +\item{min_reported_trust}{A single numeric specifying the minimum allowed +"reported trust" value} + +\item{year}{A single numeric specifying the year of data to be returned} + +\item{...}{Arguments to be passed to the \code{connect_conn()} function (for +specifying database connection parameters)} +} +\value{ +A tibble properly prepared to be saved as the +\code{factset_iss_emissions.rds} output file +} +\description{ +Get the ISS emissions data from the FactSet database and prepare the +\code{factset_iss_emissions} tibble +} diff --git a/man/workflow.factset-package.Rd b/man/workflow.factset-package.Rd new file mode 100644 index 0000000..ba4560a --- /dev/null +++ b/man/workflow.factset-package.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/workflow.factset-package.R +\docType{package} +\name{workflow.factset-package} +\alias{workflow.factset} +\alias{workflow.factset-package} +\title{workflow.factset: Extract Financial Data for use in PACTA} +\description{ +Extract data from a FactSet Postgres database for use as part of PACTA Data Preparation +} +\author{ +\strong{Maintainer}: CJ Yetman \email{cj@cjyetman.com} (\href{https://orcid.org/0000-0001-5099-9500}{ORCID}) [contractor] + +Authors: +\itemize{ + \item Jackson Hoffart \email{jackson.hoffart@gmail.com} (\href{https://orcid.org/0000-0002-8600-5042}{ORCID}) [contractor] + \item Jacob Kastl \email{jacob.kastl@gmail.com} [contractor] + \item Alex Axthelm \email{aaxthelm@rmi.org} (\href{https://orcid.org/0000-0001-8579-8565}{ORCID}) [contractor] +} + +Other contributors: +\itemize{ + \item RMI \email{PACTA4investors@rmi.org} [copyright holder, funder] +} + +} +\keyword{internal}