diff --git a/.Rbuildignore b/.Rbuildignore
index 5163d0b..d32b58f 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1 +1,4 @@
 ^LICENSE\.md$
+.git/
+.github/
+^\.github$
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..c49182c
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,3 @@
+.git/
+.github/
+Dockerfile
diff --git a/.github/.gitignore b/.github/.gitignore
new file mode 100644
index 0000000..2d19fc7
--- /dev/null
+++ b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/build-Docker-image-nightly.yml b/.github/workflows/build-Docker-image-nightly.yml
new file mode 100644
index 0000000..7ffa64f
--- /dev/null
+++ b/.github/workflows/build-Docker-image-nightly.yml
@@ -0,0 +1,12 @@
+on:
+  schedule:
+    - cron:  '0 0 * * 1,2,3,4,5'
+
+jobs:
+  build_docker_image:
+    name: "Call build and push action"
+    uses: ./.github/workflows/build-and-push-Docker-image.yml
+    secrets: inherit
+    with:
+      image-name: workflow.factset
+      image-tag: nightly
diff --git a/.github/workflows/build-Docker-image-on-push-to-main.yml b/.github/workflows/build-Docker-image-on-push-to-main.yml
new file mode 100644
index 0000000..b75fca6
--- /dev/null
+++ b/.github/workflows/build-Docker-image-on-push-to-main.yml
@@ -0,0 +1,12 @@
+on:
+  push:
+    branches: [main]
+
+jobs:
+  build_docker_image:
+    name: "Call build and push action"
+    uses: ./.github/workflows/build-and-push-Docker-image.yml
+    secrets: inherit
+    with:
+      image-name: workflow.factset
+      image-tag: main
diff --git a/.github/workflows/build-Docker-image-on-push-to-pr.yml b/.github/workflows/build-Docker-image-on-push-to-pr.yml
new file mode 100644
index 0000000..16934bb
--- /dev/null
+++ b/.github/workflows/build-Docker-image-on-push-to-pr.yml
@@ -0,0 +1,37 @@
+on:
+  pull_request:
+
+jobs:
+  build_docker_image:
+    name: "Call build and push action"
+    uses: ./.github/workflows/build-and-push-Docker-image.yml
+    secrets: inherit
+    with:
+      image-name: workflow.factset
+      image-tag: pr${{ github.event.pull_request.number }}
+
+  add_comment:
+    needs: build_docker_image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Find Comment
+        # https://github.com/peter-evans/find-comment
+        uses: peter-evans/find-comment@v2
+        id: fc
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          comment-author: 'github-actions[bot]'
+          body-includes: Docker image from this PR
+
+      - name: Create or update comment
+        # https://github.com/peter-evans/create-or-update-comment
+        uses: peter-evans/create-or-update-comment@v3
+        with:
+          comment-id: ${{ steps.fc.outputs.comment-id }}
+          issue-number: ${{ github.event.pull_request.number }}
+          body: |
+            Docker image from this PR (${{ github.event.pull_request.head.sha }}) created
+            ```
+            docker pull ${{ needs.build_docker_image.outputs.full-image-name }}
+            ```
+          edit-mode: replace
diff --git a/.github/workflows/build-and-push-Docker-image.yml b/.github/workflows/build-and-push-Docker-image.yml
new file mode 100644
index 0000000..b6d8e1e
--- /dev/null
+++ b/.github/workflows/build-and-push-Docker-image.yml
@@ -0,0 +1,67 @@
+---
+name: Build and push docker image
+
+on:
+  workflow_call:
+    inputs:
+      image-name:
+        required: true
+        type: string
+      image-tag:
+        required: true
+        type: string
+    outputs:
+      full-image-name:
+        description: "Full pushed image name including host/registry, name, and tag"
+        value: ${{ jobs.docker.outputs.full-image-name }}
+
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+    timeout-minutes: 25
+    outputs:
+      full-image-name: ${{ steps.image-name.outputs.full-image-name }}
+
+    steps:
+
+      - name: Define image name
+        id: image-name
+        run: |
+          full_image_name="ghcr.io/${{ github.repository_owner }}/${{ inputs.image-name }}:${{ inputs.image-tag }}"
+          full_image_name=$(echo $full_image_name | tr '[A-Z]' '[a-z]')
+          echo "full-image-name=$full_image_name" >> "$GITHUB_OUTPUT"
+          echo "$full_image_name" > full-image-name
+
+      - uses: actions/upload-artifact@v3
+        with:
+          name: full-image-name
+          path: .
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: true
+          tags: ${{ steps.image-name.outputs.full-image-name }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=min
+          no-cache-filters: install-pacta
+
+  check-system-dependencies:
+    name: "Check System Dependencies"
+    needs: docker
+    uses: ./.github/workflows/check-R-sysdeps.yml
+    with:
+      image: ${{ needs.docker.outputs.full-image-name }}
\ No newline at end of file
diff --git a/.github/workflows/check-R-sysdeps.yml b/.github/workflows/check-R-sysdeps.yml
new file mode 100644
index 0000000..3a1c08b
--- /dev/null
+++ b/.github/workflows/check-R-sysdeps.yml
@@ -0,0 +1,32 @@
+---
+name: Check R system dependencies
+
+on:
+  workflow_call:
+    inputs:
+      image:
+        required: true
+        type: string
+
+jobs:
+
+  check-system-dependencies:
+    runs-on: ubuntu-latest
+    steps:
+      - name: 'Pull image'
+        run: |
+          echo ${{ inputs.image }}
+          docker pull ${{ inputs.image }}
+      - name: 'Run pak::sysreqs_check_installed()'
+        run: |
+
+          docker run \
+          --rm \
+          --entrypoint "/bin/sh" \
+          ${{ inputs.image }} \
+          -c "Rscript -e '
+            x <- pak::sysreqs_check_installed()
+            print(x)
+            is_installed <- as.data.frame(x)[[\"installed\"]]
+            stopifnot(all(is_installed))
+          '"
diff --git a/.github/workflows/lint-package.yaml b/.github/workflows/lint-package.yaml
new file mode 100644
index 0000000..f4c4ef2
--- /dev/null
+++ b/.github/workflows/lint-package.yaml
@@ -0,0 +1,32 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: lint
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::lintr, local::.
+          needs: lint
+
+      - name: Lint
+        run: lintr::lint_package()
+        shell: Rscript {0}
+        env:
+          LINTR_ERROR_ON_LINT: true
diff --git a/.github/workflows/run-hadolint.yml b/.github/workflows/run-hadolint.yml
new file mode 100644
index 0000000..0f07812
--- /dev/null
+++ b/.github/workflows/run-hadolint.yml
@@ -0,0 +1,11 @@
+---
+on: [push, pull_request]
+
+jobs:
+  hadolint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: hadolint/hadolint-action@v3.1.0
+        with:
+          dockerfile: Dockerfile
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e88cb47
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.env
+azure-deploy.parameters.json
diff --git a/DESCRIPTION b/DESCRIPTION
index 03ec3c5..4a5c654 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -30,3 +30,13 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
+Imports: 
+    DBI,
+    dbplyr,
+    dplyr,
+    logger,
+    rlang,
+    RPostgres,
+    withr
+Suggests: 
+    rstudioapi
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..1d8a5d2
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,69 @@
+# using rocker r-vers as a base with R 4.3.1
+# https://hub.docker.com/r/rocker/r-ver
+# https://rocker-project.org/images/versioned/r-ver.html
+#
+# sets CRAN repo to use Posit Package Manager to freeze R package versions to
+# those available on 2023-10-30
+# https://packagemanager.posit.co/client/#/repos/2/overview
+# https://packagemanager.posit.co/cran/__linux__/jammy/2023-10-30
+
+# set proper base image
+ARG R_VERS="4.3.1"
+FROM rocker/r-ver:$R_VERS AS base
+
+# set Docker image labels
+LABEL org.opencontainers.image.source=https://github.com/RMI-PACTA/workflow.factset
+LABEL org.opencontainers.image.description="Extract FactSet Data for use in PACTA"
+LABEL org.opencontainers.image.licenses=MIT
+LABEL org.opencontainers.image.title=""
+LABEL org.opencontainers.image.revision=""
+LABEL org.opencontainers.image.version=""
+LABEL org.opencontainers.image.vendor=""
+LABEL org.opencontainers.image.base.name=""
+LABEL org.opencontainers.image.ref.name=""
+LABEL org.opencontainers.image.authors=""
+
+# set apt-get to noninteractive mode
+ARG DEBIAN_FRONTEND="noninteractive"
+ARG DEBCONF_NOWARNINGS="yes"
+
+RUN groupadd -r runner-workflow-factset \
+      && useradd -r -g runner-workflow-factset runner-workflow-factset \
+      && mkdir -p /home/runner-workflow-factset \
+      && chown -R runner-workflow-factset /home/runner-workflow-factset
+WORKDIR /home/runner-workflow-factset
+
+# install system dependencies
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+      libicu-dev=70.* \
+      libpq-dev=14.* \
+    && chmod -R a+rwX /root \
+    && rm -rf /var/lib/apt/lists/*
+
+# set frozen CRAN repo
+ARG CRAN_REPO="https://packagemanager.posit.co/cran/__linux__/jammy/2023-10-30"
+RUN echo "options(repos = c(CRAN = '$CRAN_REPO'), pkg.sysreqs = FALSE)" >> "${R_HOME}/etc/Rprofile.site" \
+      # install packages for dependency resolution and installation
+      && Rscript -e "install.packages(c('pak', 'jsonlite'))"
+
+# Install R deopendencies
+COPY DESCRIPTION /workflow.factset/DESCRIPTION
+
+# install R package dependencies
+RUN Rscript -e "\
+  deps <- pak::local_install_deps(root = '/workflow.factset'); \
+  "
+
+# copy in everything from this repo
+COPY . /workflow.factset
+
+# install R package dependencies
+RUN Rscript -e "\
+  pak::pkg_install('local::/workflow.factset'); \
+  "
+
+USER runner-workflow-factset
+
+# set default run behavior
+CMD ["Rscript", "-e", "logger::log_threshold(Sys.getenv('LOG_LEVEL', 'INFO'));workflow.factset::export_pacta_files()"]
diff --git a/NAMESPACE b/NAMESPACE
index 6ae9268..6b2e361 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,2 +1,13 @@
 # Generated by roxygen2: do not edit by hand
 
+export(connect_factset_db)
+export(export_pacta_files)
+export(get_entity_financing_data)
+export(get_entity_info)
+export(get_financial_data)
+export(get_fund_data)
+export(get_isin_to_fund_table)
+export(get_iss_emissions_data)
+importFrom(dplyr,"%>%")
+importFrom(rlang,.data)
+importFrom(rlang,.env)
diff --git a/R/connect_factset_db.R b/R/connect_factset_db.R
new file mode 100644
index 0000000..af938cd
--- /dev/null
+++ b/R/connect_factset_db.R
@@ -0,0 +1,107 @@
+#' Export files for use in PACTA data preparation
+#'
+#' @param dbname name of the database to connect to
+#' @param host hostname of the server to connect to
+#' @param port port number of the server to connect to
+#' @param options additional options to pass to the database connection.
+#' Typically used to define schema search path.
+#' @param username username to use for the database connection
+#' @param password password to use for the database connection
+#'
+#' @return a database connection object
+#'
+#' @export
+
+
+connect_factset_db <- function(
+  dbname = Sys.getenv("PGDATABASE"),
+  host = Sys.getenv("PGHOST"),
+  port = Sys.getenv("PGPORT", 5432L),
+  options = "-c search_path=fds",
+  username = Sys.getenv("PGUSER"),
+  password = Sys.getenv("PGPASSWORD")
+) {
+
+  if (username == "") {
+    logger::log_error(
+      "No database username could be found. ",
+      "Please set the username as an environment variable"
+    )
+  }
+
+  if (password == "") {
+    logger::log_error(
+      "No database password could be found. ",
+      "Please set the password as an environment variable"
+    )
+  }
+
+  logger::log_trace(
+    "Connecting to database {dbname} on {host}:{port} as {username}"
+  )
+  conn <-
+    DBI::dbConnect(
+      drv = RPostgres::Postgres(),
+      dbname = dbname,
+      host = host,
+      port = port,
+      user = username,
+      password = password,
+      options = options
+    )
+
+  reg_conn_finalizer(conn, DBI::dbDisconnect, parent.frame())
+}
+
+# connection finalizer to ensure connection is closed --------------------------
+# adapted from: https://shrektan.com/post/2019/07/26/create-a-database-connection-that-can-be-disconnected-automatically/ #nolint
+
+reg_conn_finalizer <- function(
+  conn,
+  close_fun,
+  envir
+) {
+  is_parent_global <- identical(.GlobalEnv, envir)
+
+  if (isTRUE(is_parent_global)) {
+    env_finalizer <- new.env(parent = emptyenv())
+    env_finalizer$conn <- conn
+    attr(conn, "env_finalizer") <- env_finalizer
+
+    reg.finalizer(env_finalizer, function(e) {
+      if (DBI::dbIsValid(e$conn)) {
+        warn_db_autoclose(e$conn)
+        try(close_fun(e$conn))
+      }
+    },
+    onexit = TRUE
+    )
+  } else {
+    withr::defer(
+      {
+        if (DBI::dbIsValid(conn)) {
+          warn_db_autoclose(conn)
+          try(close_fun(conn))
+        }
+      },
+      envir = envir,
+      priority = "last"
+    )
+  }
+
+  logger::log_trace("Database connection registered for finalization")
+  return(conn)
+}
+
+warn_db_autoclose <- function(conn) {
+  dbname <- DBI::dbGetInfo(conn)$dbname
+  host <- DBI::dbGetInfo(conn)$host
+  logger::log_warn(
+    "The database connection to ",
+    dbname,
+    " on ",
+    host,
+    " was closed automatically ",
+    "because the calling environment was closed."
+  )
+}
diff --git a/R/export_pacta_files.R b/R/export_pacta_files.R
new file mode 100644
index 0000000..c11eb0c
--- /dev/null
+++ b/R/export_pacta_files.R
@@ -0,0 +1,162 @@
+#' Export files for use in PACTA data preparation
+#'
+#' @param Destination directory for the output files
+#'
+#' @param destination path to directory where exported files will be saved
+#' @param data_timestamp filter data as-of this timestamp
+#'
+#' @return vector of paths to exported files
+#'
+#' @export
+
+export_pacta_files <- function(
+  conn = connect_factset_db(),
+  destination = file.path(Sys.getenv("EXPORT_DESTINATION")),
+  data_timestamp = Sys.getenv("DATA_TIMESTAMP", Sys.time()),
+  terminate_connection = (
+    # Terminate connection if it was created by this function.
+    deparse(substitute(conn)) == formals(export_pacta_files)[["conn"]]
+  )
+) {
+
+  # Prepare output directories
+
+  if (!dir.exists(destination)) {
+    logger::log_error(
+      "The destination directory {destination} does not exist."
+    )
+    stop("Destination directory does not exist.")
+  }
+
+  if (Sys.getenv("DEPLOY_START_TIME") == "") {
+    logger::log_warn(
+      "The environment variable DEPLOY_START_TIME is not set. ",
+      "Using current system time as start time."
+    )
+  }
+
+  start_time_chr <- Sys.getenv(
+    "DEPLOY_START_TIME",
+    format(Sys.time(), format = "%Y%m%dT%H%M%S", tz = "UTC"),
+  )
+
+  if (inherits(data_timestamp, "character")) {
+    data_timestamp <- lubridate::ymd_hms(
+      data_timestamp,
+      quiet = TRUE,
+      tz = "UTC",
+      truncated = 3
+    )
+  }
+
+  if (inherits(data_timestamp, "POSIXct")) {
+    data_timestamp_chr <- format(
+      data_timestamp,
+      format = "%Y%m%dT%H%M%S",
+      tz = "UTC"
+    )
+  } else {
+    logger::log_error(
+      "The data_timestamp argument must be a POSIXct object ",
+      "or a character string coercible to POSIXct format",
+      " (using lubridate::ymd_hms(truncated = 3))."
+    )
+    stop("Invalid data_timestamp argument.")
+  }
+
+  export_dir <- file.path(
+    destination,
+    paste0(data_timestamp_chr, "_pulled", start_time_chr)
+  )
+
+  if (!dir.exists(export_dir)) {
+    dir.create(export_dir, recursive = TRUE)
+  }
+
+  # Start Extracting Data
+
+  financial_data_path <- file.path(
+    export_dir,
+    "factset_financial_data.rds"
+  )
+  logger::log_info("Fetching financial data.")
+  financial_data <- get_financial_data(
+    conn = conn,
+    data_timestamp = data_timestamp
+  )
+  logger::log_info("Exporting financial data to {factset_financial_data_path}")
+  saveRDS(object = financial_data, file = financial_data_path)
+
+  entity_info_path <- file.path(export_dir, "factset_entity_info.rds")
+  logger::log_info("Fetching entity info data.")
+  entity_info <- get_entity_info(conn = conn)
+  logger::log_info("Exporting entity info data to {factset_entity_info_path}")
+  saveRDS(object = entity_info, file = entity_info_path)
+
+  entity_financing_data_path <- file.path(
+    export_dir,
+    "factset_entity_financing_data.rds"
+  )
+  logger::log_info("Fetching entity financing data.")
+  entity_financing_data <- get_entity_financing_data(
+    conn = conn,
+    data_timestamp = data_timestamp
+  )
+  logger::log_info(
+    "Exporting entity financing data to {factset_entity_financing_data_path}"
+  )
+  saveRDS(
+    object = entity_financing_data,
+    file = entity_financing_data_path
+  )
+
+  fund_data_path <- file.path(export_dir, "factset_fund_data.rds")
+  logger::log_info("Fetching fund data.")
+  fund_data <- get_fund_data(conn = conn)
+  logger::log_info("Exporting fund data to {factset_fund_data_path}")
+  saveRDS(object = fund_data, file = fund_data_path)
+
+  isin_to_fund_table_path <- file.path(
+    export_dir,
+    "factset_isin_to_fund_table.rds"
+  )
+  logger::log_info("Fetching ISIN to fund table.")
+  isin_to_fund_table <- get_isin_to_fund_table(conn = conn)
+  logger::log_info(
+    "Exporting ISIN to fund table to {factset_isin_to_fund_table_path}"
+  )
+  saveRDS(object = isin_to_fund_table, file = isin_to_fund_table_path)
+
+  iss_emissions_path <- file.path(
+    export_dir,
+    "factset_iss_emissions.rds"
+  )
+  logger::log_info("Fetching ISS emissions data.")
+  iss_emissions <- get_iss_emissions_data(conn = conn)
+  logger::log_info(
+    "Exporting ISS emissions data to {factset_iss_emissions_path}"
+  )
+  saveRDS(object = iss_emissions, file = iss_emissions_path)
+
+
+  logger::log_info("Done with data export.")
+
+  # Terminate connection if needed
+  if (terminate_connection) {
+    logger::log_info("Terminating database connection.")
+    DBI::dbDisconnect(conn)
+  }
+
+  return(
+    invisible(
+      c(
+        financial_data_path = financial_data_path,
+        entity_info_path = entity_info_path,
+        entity_financing_data_path = entity_financing_data_path,
+        fund_data_path = fund_data_path,
+        isin_to_fund_table_path = isin_to_fund_table_path,
+        iss_emissions_path = iss_emissions_path
+      )
+    )
+  )
+}
diff --git a/R/get_entity_financing_data.R b/R/get_entity_financing_data.R
new file mode 100644
index 0000000..2afe796
--- /dev/null
+++ b/R/get_entity_financing_data.R
@@ -0,0 +1,96 @@
+#' Get the entity financing data from the FactSet database and prepare the
+#' `factset_entity_financing_data` tibble
+#'
+#' @param conn databse connection
+#' @param data_timestamp A single string specifying the desired date for the
+#'   data in the form "2021-12-31"
+#'
+#' @return A tibble properly prepared to be saved as the
+#'   `factset_entity_financing_data.rds` output file
+#'
+#' @export
+
+get_entity_financing_data <- function(
+  conn,
+  data_timestamp
+) {
+  # get fsym_id to fundamentals fsym_company_id --------------------------------
+
+  logger::log_debug("Extracting entity financing info from database.")
+  logger::log_debug("using data timestamp: ", data_timestamp)
+
+  logger::log_trace("Accessing security map - FactSet Fundamentals.")
+  ff_fsym_company_id <- dplyr::tbl(conn, "ff_v3_ff_sec_map")
+
+  logger::log_trace("Accessing security map - FactSet Ownership.")
+  own_fsym_company_id <- dplyr::tbl(conn, "own_v5_own_sec_map")
+
+  logger::log_trace("UNIONing security maps.")
+  fsym_company_id <- dplyr::union_all(
+    ff_fsym_company_id,
+    own_fsym_company_id
+  )
+
+
+  # get fsym_id to factset_entity_id -------------------------------------------
+
+  logger::log_trace("Accessing security to entity map - FactSet Fundamentals.")
+  ff_sec_entity <- dplyr::tbl(conn, "ff_v3_ff_sec_entity")
+
+  logger::log_trace("Accessing security to entity map - FactSet Ownership.")
+  own_sec_entity <- dplyr::tbl(conn, "own_v5_own_sec_entity")
+
+  logger::log_trace("UNIONing security to entity maps.")
+  sec_entity <- dplyr::union_all(
+    ff_sec_entity,
+    own_sec_entity
+  )
+
+
+  # get market value data ------------------------------------------------------
+
+  logger::log_trace("Accessing market value data.")
+  ff_mkt_val <- dplyr::tbl(conn, "ff_v3_ff_basic_der_af") %>%
+    dplyr::select("fsym_id", "date", "currency", "ff_mkt_val")
+
+
+  # get debt outstanding data --------------------------------------------------
+
+  logger::log_trace("Accessing balance sheet data.")
+  ff_debt <- dplyr::tbl(conn, "ff_v3_ff_basic_af") %>%
+    dplyr::select("fsym_id", "date", "currency", "ff_debt")
+
+
+  # merge and collect the data, then disconnect --------------------------------
+
+  logger::log_trace("Merging entity financing data.")
+  entity_financing_data <- ff_mkt_val %>%
+    dplyr::full_join(
+      ff_debt,
+      by = c("fsym_id", "date", "currency")
+    ) %>%
+    dplyr::left_join(fsym_company_id, by = "fsym_id") %>%
+    dplyr::inner_join(sec_entity, by = c("fsym_company_id" = "fsym_id")) %>%
+    dplyr::filter(!(is.na(.data$ff_mkt_val) & is.na(.data$ff_debt))) %>%
+    dplyr::group_by(.data$fsym_id, .data$currency) %>%
+    dplyr::filter(.data$date <= .env$data_timestamp) %>%
+    dplyr::filter(
+      lubridate::year(.data$date) == lubridate::year(data_timestamp)
+    ) %>%
+    dplyr::filter(.data$date == max(.data$date)) %>%
+    dplyr::ungroup()
+
+  logger::log_trace("Downloading entity financing data.")
+  entity_financing_data <- entity_financing_data %>%
+    dplyr::collect() %>%
+    dplyr::mutate(
+      # convert units from millions to units
+      ff_mkt_val = .data$ff_mkt_val * 1e6,
+      ff_debt = .data$ff_debt * 1e6
+    ) %>%
+    dplyr::distinct()
+
+  # return the entity financing data -------------------------------------------
+
+  entity_financing_data
+}
diff --git a/R/get_entity_info.R b/R/get_entity_info.R
new file mode 100644
index 0000000..a5b3cd3
--- /dev/null
+++ b/R/get_entity_info.R
@@ -0,0 +1,147 @@
+#' Get the entity info data from the FactSet database and prepare the
+#' `factset_entity_info` tibble
+#'
+#' @param conn database connection
+#'
+#' @return A tibble properly prepared to be saved as the
+#'   `factset_entity_info.rds` output file
+#'
+#' @export
+
+get_entity_info <-
+  function(conn) {
+    # build connection to database ---------------------------------------------
+
+    logger::log_debug("Extracting entity info from database.")
+
+    # company_name -------------------------------------------------------------
+
+    logger::log_trace("Accessing entity proper names.")
+    entity_proper_name <-
+      dplyr::tbl(conn, "sym_v1_sym_entity") %>%
+      dplyr::select("factset_entity_id", "entity_proper_name")
+
+
+    # country_of_domicile ------------------------------------------------------
+
+    logger::log_trace("Accessing entity country of domicile.")
+    iso_country <-
+      dplyr::tbl(conn, "sym_v1_sym_entity") %>%
+      dplyr::select("factset_entity_id", "iso_country")
+
+
+    # sector -------------------------------------------------------------------
+
+    logger::log_trace("Accessing entity sector.")
+    sector_code <-
+      dplyr::tbl(conn, "sym_v1_sym_entity_sector") %>%
+      dplyr::select("factset_entity_id", "sector_code")
+
+    logger::log_trace("Accessing sector descriptions.")
+    sector_code__sector_desc <-
+      dplyr::tbl(conn, "ref_v2_factset_sector_map") %>%
+      dplyr::select(.data$factset_sector_code, .data$factset_sector_desc)
+
+    logger::log_trace("Merging sector codes and sector descriptions.")
+    factset_sector_desc <-
+      sector_code %>%
+      dplyr::left_join(
+        sector_code__sector_desc,
+        by = c("sector_code" = "factset_sector_code")
+      ) %>%
+      dplyr::select("factset_entity_id", "sector_code", "factset_sector_desc")
+
+
+    # sub-sector/industry ------------------------------------------------------
+
+    logger::log_trace("Accessing entity industry codes.")
+    industry_code <-
+      dplyr::tbl(conn, "sym_v1_sym_entity_sector") %>%
+      dplyr::select("factset_entity_id", "industry_code")
+
+    logger::log_trace("Accessing industry descriptions")
+    industry_code__industry_desc <-
+      dplyr::tbl(conn, "ref_v2_factset_industry_map") %>%
+      dplyr::select("factset_industry_code", "factset_industry_desc")
+
+    logger::log_trace("Merging industry codes and industry descriptions.")
+    factset_industry_desc <-
+      industry_code %>%
+      dplyr::left_join(
+        industry_code__industry_desc,
+        by = c("industry_code" = "factset_industry_code")
+      ) %>%
+      dplyr::select(
+        "factset_entity_id",
+        "industry_code",
+        "factset_industry_desc"
+      )
+
+
+    # credit risk parent -------------------------------------------------------
+
+    logger::log_trace("Accessing entity affiliates.")
+    ent_v1_ent_entity_affiliates <- dplyr::tbl(
+      conn,
+      "ent_v1_ent_entity_affiliates"
+    )
+
+    logger::log_trace("Accessing affiliate type map.")
+    ref_v2_affiliate_type_map <- dplyr::tbl(
+      conn,
+      "ref_v2_affiliate_type_map"
+    )
+
+    logger::log_trace("Determining last update time for entity affiliates.")
+    affiliates_last_update <-
+      dplyr::tbl(conn, "fds_fds_file_history") %>%
+      dplyr::filter(.data$table_name == "ent_entity_affiliates") %>%
+      dplyr::filter(
+        .data$begin_time == max(.data$begin_time, na.rm = TRUE)
+      ) %>%
+      # pull also handles `collect`ing the data
+      dplyr::pull("begin_time")
+
+    logger::log_trace("Determining credit risk parent via entity affiliates.")
+    credit_parent_id <-
+      ent_v1_ent_entity_affiliates %>%
+      dplyr::left_join(ref_v2_affiliate_type_map, by = "aff_type_code") %>%
+      dplyr::filter(.data$aff_type_desc == "Credit Risk Parent") %>%
+      dplyr::select(
+        factset_entity_id = "factset_affiliated_entity_id",
+        credit_parent_id = "factset_entity_id"
+      ) %>%
+      dplyr::mutate(
+        ent_entity_affiliates_last_update = affiliates_last_update
+      )
+
+
+    # merge and collect --------------------------------------------------------
+
+    logger::log_trace("Merging entity info.")
+    entity_info <-
+      entity_proper_name %>%
+      dplyr::left_join(
+        iso_country,
+        by = "factset_entity_id"
+      ) %>%
+      dplyr::left_join(
+        factset_sector_desc,
+        by = "factset_entity_id"
+      ) %>%
+      dplyr::left_join(
+        factset_industry_desc,
+        by = "factset_entity_id"
+      ) %>%
+      dplyr::left_join(
+        credit_parent_id,
+        by = "factset_entity_id"
+      )
+
+    logger::log_trace("Downloading merged entity info from database.")
+    entity_info <- dplyr::collect(entity_info)
+    logger::log_trace("Download complete.")
+
+    # return prepared data -----------------------------------------------------
+    return(entity_info)
+  }
diff --git a/R/get_financial_data.R b/R/get_financial_data.R
new file mode 100644
index 0000000..c6b3555
--- /dev/null
+++ b/R/get_financial_data.R
@@ -0,0 +1,92 @@
+#' Get the factset financial data from the FactSet database and prepare the
+#' `factset_financial_data` tibble
+#'
+#' @param conn databse connection
+#' @param data_timestamp A single string specifying the desired date for the
+#'   data in the form "2021-12-31"
+#'
+#' @return A tibble properly prepared to be saved as the
+#'   `factset_financial_data.rds` output file
+#'
+#' @export
+
+get_financial_data <-
+  function(conn, data_timestamp, ...) {
+    # build connection to database ---------------------------------------------
+
+    logger::log_debug("Extracting financial info from database.")
+    logger::log_info("using data timestamp: ", data_timestamp)
+
+
+    # factset_entity_id -----------------------------------------------
+
+    logger::log_trace("Accessing entity id.")
+    factset_entity_id <-
+      dplyr::tbl(conn, "own_v5_own_sec_entity") %>%
+      dplyr::select("fsym_id", "factset_entity_id")
+
+
+    # isin ---------------------------------------------------------------------
+
+    logger::log_trace("Accessing ISINs.")
+    isin <- dplyr::tbl(conn, "sym_v1_sym_isin")
+
+
+    # adj_price ----------------------------------------------------------------
+
+    logger::log_trace(
+      "Accessing share prices. ",
+      "Filtering to date: {data_timestamp}"
+    )
+    adj_price <-
+      dplyr::tbl(conn, "own_v5_own_sec_prices") %>%
+      dplyr::filter(.data$price_date == .env$data_timestamp) %>%
+      dplyr::select("fsym_id", "adj_price")
+
+
+    # adj_shares_outstanding ---------------------------------------------------
+
+    logger::log_trace(
+      "Accessing shares outstanding. ",
+      "Filtering to date: {data_timestamp}"
+    )
+    adj_shares_outstanding <-
+      dplyr::tbl(conn, "own_v5_own_sec_prices") %>%
+      dplyr::filter(.data$price_date == .env$data_timestamp) %>%
+      dplyr::select("fsym_id", "adj_shares_outstanding")
+
+
+    # issue_type ---------------------------------------------------------------
+
+    logger::log_trace("Accessing issue type.")
+    issue_type <-
+      dplyr::tbl(conn, "own_v5_own_sec_coverage") %>%
+      dplyr::select("fsym_id", "issue_type")
+
+
+    # one_adr_eq ---------------------------------------------------------------
+
+    logger::log_trace("Accessing ADR equivilents.")
+    one_adr_eq <-
+      dplyr::tbl(conn, "own_v5_own_sec_adr_ord_ratio") %>%
+      dplyr::select("fsym_id" = "adr_fsym_id", "one_adr_eq")
+
+
+    # merge and collect --------------------------------------------------------
+
+    logger::log_trace("Merging financial info.")
+    fin_data <-
+      isin %>%
+      dplyr::left_join(factset_entity_id, by = "fsym_id") %>%
+      dplyr::left_join(adj_price, by = "fsym_id") %>%
+      dplyr::left_join(adj_shares_outstanding, by = "fsym_id") %>%
+      dplyr::left_join(issue_type, by = "fsym_id") %>%
+      dplyr::left_join(one_adr_eq, by = "fsym_id")
+
+    logger::log_trace("Downloading merged financial info from database.")
+    fin_data <- dplyr::collect(fin_data)
+    logger::log_trace("Download complete.")
+
+    # return prepared data -----------------------------------------------------
+    return(fin_data)
+  }
diff --git a/R/get_fund_data.R b/R/get_fund_data.R
new file mode 100644
index 0000000..9a555ab
--- /dev/null
+++ b/R/get_fund_data.R
@@ -0,0 +1,98 @@
+#' Get the fund data from the FactSet database and prepare the
+#' `factset_fund_data` tibble
+#'
+#' @param conn databse connection
+#' @param data_timestamp A single string specifying the desired date for the
+#'   data in the form "2021-12-31"
+#'
+#' @return A tibble properly prepared to be saved as the `factset_fund_data.rds`
+#'   output file
+#'
+#' @export
+
+get_fund_data <- function(conn, data_timestamp) {
+  # get the fund holdings and the holdings' reported market value ------------
+
+  logger::log_debug("Extracting financial info from database.")
+  logger::log_info("using data timestamp: ", data_timestamp)
+
+  logger::log_trace(
+    "Accessing historical fund holdings - security level. ",
+    "Filtering to date: {data_timestamp}"
+  )
+  fund_security <-
+    dplyr::tbl(conn, "own_v5_own_fund_detail") %>%
+    dplyr::filter(.data$report_date == .env$data_timestamp) %>%
+    dplyr::select(
+      factset_fund_id = "factset_fund_id",
+      holding_fsym_id = "fsym_id",
+      holding_reported_mv = "reported_mv"
+    )
+
+  logger::log_trace(
+    "Accessing historical fund holdings - non-securities. ",
+    "Filtering to date: {data_timestamp}"
+  )
+  fund_nonsecurity <-
+    dplyr::tbl(conn, "own_v5_own_fund_generic") %>%
+    dplyr::filter(.data$report_date == .env$data_timestamp) %>%
+    dplyr::select(
+      factset_fund_id = "factset_fund_id",
+      holding_fsym_id = "generic_id",
+      holding_reported_mv = "reported_mv"
+    )
+
+  logger::log_trace(
+    "Combining historical fund holdings - security and non-security."
+  )
+  fund_holding <-
+    dplyr::union_all(
+      fund_security,
+      fund_nonsecurity
+    )
+
+
+  # get the fund total reported market value ---------------------------------
+
+  logger::log_trace(
+    "Accessing historical fund filings.",
+    "Filtering to date: {data_timestamp}"
+  )
+  fund_mv <-
+    dplyr::tbl(conn, "own_v5_own_ent_fund_filing_hist") %>%
+    dplyr::filter(.data$report_date == .env$data_timestamp) %>%
+    dplyr::select("factset_fund_id", "total_reported_mv")
+
+
+  logger::log_trace(
+    "Accessing current ISIN mappings.",
+  )
+  # symbology containing the ISIN to fsym_id link
+  fsym_id__isin <-
+    dplyr::tbl(conn, "sym_v1_sym_isin")
+
+
+  # merge and collect the data, then disconnect ------------------------------
+
+  logger::log_trace("Merging the data.")
+  fund_data <-
+    fund_mv %>%
+    dplyr::filter(
+      .data$total_reported_mv != 0 | !is.na(.data$total_reported_mv)
+    ) %>%
+    dplyr::left_join(fund_holding, by = "factset_fund_id") %>%
+    dplyr::left_join(fsym_id__isin, by = c(`holding_fsym_id` = "fsym_id")) %>%
+    dplyr::select(
+      factset_fund_id = "factset_fund_id",
+      fund_reported_mv = "total_reported_mv",
+      holding_isin = "isin",
+      holding_reported_mv = "holding_reported_mv"
+    )
+
+  logger::log_trace("Downloading fund data.")
+  fund_data <- dplyr::collect(fund_data)
+
+  # return the fund data -----------------------------------------------------
+
+  return(fund_data)
+}
diff --git a/R/get_isin_to_fund_table.R b/R/get_isin_to_fund_table.R
new file mode 100644
index 0000000..bf76142
--- /dev/null
+++ b/R/get_isin_to_fund_table.R
@@ -0,0 +1,40 @@
+#' Get the isin_to_fund_table data from the FactSet database and prepare the
+#' `factset_isin_to_fund_table` tibble
+#'
+#' @param conn database connection
+#'
+#' @return A tibble properly prepared to be saved as the
+#'   `factset_isin_to_fund_table.rds` output file
+#'
+#' @export
+
+get_isin_to_fund_table <- function(conn) {
+  # get the ISIN to fsym_id table --------------------------------------------
+
+  logger::info("Getting ISIN to fsym_id mapping")
+  isin <-
+    dplyr::tbl(conn, "sym_v1_sym_isin") %>%
+    dplyr::select("isin", "fsym_id")
+
+
+  # get the fsym_id to fund_id table -----------------------------------------
+
+  logger::info("Getting fsym_id to fund id mapping")
+  fund_id <-
+    dplyr::tbl(conn, "own_v5_own_ent_fund_identifiers") %>%
+    dplyr::filter(.data$identifier_type == "FSYM_ID") %>%
+    dplyr::select(fsym_id = "fund_identifier", "factset_fund_id")
+
+
+  # merge and collect the data ------------------------------
+
+  logger::info("Merging ISIN to fsym_id and fsym_id to fund_id")
+  isin__factset_fund_id <-
+    fund_id %>%
+    dplyr::inner_join(isin, by = "fsym_id") %>%
+    dplyr::select("isin", "fsym_id", "factset_fund_id") %>%
+    dplyr::collect()
+
+  # return the ISIN to fund_id table -----------------------------------------
+  return(isin__factset_fund_id)
+}
diff --git a/R/get_iss_emissions_data.R b/R/get_iss_emissions_data.R
new file mode 100644
index 0000000..17dd790
--- /dev/null
+++ b/R/get_iss_emissions_data.R
@@ -0,0 +1,108 @@
+#' Get the ISS emissions data from the FactSet database and prepare the
+#' `factset_iss_emissions` tibble
+#'
+#' @param conn databse connection
+#' @param year A single numeric specifying the year of data to be returned
+#' @param min_estimated_trust A single numeric specifying the minimum allowed
+#'   "estimated trust" value
+#' @param min_reported_trust A single numeric specifying the minimum allowed
+#'   "reported trust" value
+#' @param ... Arguments to be passed to the `connect_conn()` function (for
+#'   specifying database connection parameters)
+#'
+#' @return A tibble properly prepared to be saved as the
+#'   `factset_iss_emissions.rds` output file
+#'
+#' @export
+
+get_iss_emissions_data <- function(
+  conn,
+  reporting_year,
+  min_estimated_trust = 0.0,
+  min_reported_trust = 0.0
+) {
+  # convert `year` to date ---------------------------------------------------
+  sql_filter_date <- as.Date(paste0(reporting_year, "-01-01"), "%Y-%m-%d")
+
+  # get the relevant fsym_id to factset_entity_id table ----------------------
+  fsym_id__factset_entity_id <-
+    dplyr::tbl(conn, "icc_v2_icc_sec_entity_hist") %>%
+    # end_date identifies the date the identifier was last associated with
+    # fsym_id i.e. if there is no end_date (end_date == NA) then the
+    # association is still valid
+    dplyr::filter(
+      .data$end_date >= sql_filter_date | is.na(.data$end_date)
+    ) %>%
+    dplyr::filter(!is.na(.data$fsym_id)) %>%
+    dplyr::filter(!is.na(.data$factset_entity_id)) %>%
+    dplyr::select("fsym_id", "factset_entity_id") %>%
+    dplyr::distinct()
+
+
+  # get the relevant icc_security_id to factset_entity_id table --------------
+
+  icc_security_id <-
+    dplyr::tbl(conn, "icc_v2_icc_factset_id_map") %>%
+    dplyr::filter(.data$provider_id_type == "icc_security_id") %>%
+    dplyr::filter(.data$factset_id_type == "fsym_security_id") %>%
+    dplyr::filter(!is.na(.data$factset_id)) %>%
+    # do not use a fsym_id that was started in the current year to avoid data
+    # based on a partial year
+    dplyr::filter(.data$id_start_date < sql_filter_date) %>%
+    # end_date identifies the date the identifier was last associated with
+    # fsym_id i.e. if there is no end_date (end_date == NA) then the
+    # association is still valid
+    dplyr::filter(
+      .data$id_end_date >= sql_filter_date | is.na(.data$id_end_date)
+    ) %>%
+    dplyr::select(icc_security_id = "provider_id", fsym_id = "factset_id") %>%
+    dplyr::inner_join(fsym_id__factset_entity_id, by = "fsym_id") %>%
+    dplyr::select("icc_security_id", "factset_entity_id") %>%
+    dplyr::distinct()
+
+
+  # get the factset_entity_id to icc_total_emissions data --------------------
+
+  icc_total_emissions <-
+    dplyr::tbl(conn, "icc_v2_icc_carbon_climate_core") %>%
+    dplyr::filter(.data$icc_emissions_fiscal_year == .env$reporting_year) %>%
+    dplyr::group_by(.data$icc_security_id, .data$icc_emissions_fiscal_year) %>%
+    # icc_archive_date marks the date a data point was submitted, and some
+    # times there are updates of previous data submissions, so we need to
+    # dplyr::filter only for the most recent submission
+    dplyr::filter(
+      .data$icc_archive_date == max(.data$icc_archive_date, na.rm = TRUE)
+    ) %>%
+    dplyr::ungroup() %>%
+    dplyr::group_by(.data$icc_company_id, .data$icc_emissions_fiscal_year) %>%
+    dplyr::filter(
+      .data$icc_archive_date == max(.data$icc_archive_date, na.rm = TRUE)
+    ) %>%
+    dplyr::ungroup() %>%
+    dplyr::filter(
+      .data$icc_emissions_estimated_trust > min_estimated_trust |
+        .data$icc_emissions_reported_trust > min_reported_trust
+    ) %>%
+    dplyr::select(
+      "icc_security_id",
+      "icc_total_emissions",
+      "icc_scope_3_emissions"
+    ) %>%
+    dplyr::inner_join(icc_security_id, by = "icc_security_id") %>%
+    dplyr::select(
+      "factset_entity_id",
+      "icc_total_emissions",
+      "icc_scope_3_emissions"
+    )
+
+  # collect the data, then disconnect ----------------------------------------
+
+  logger::log_trace("Downloading emissions data.")
+  icc_total_emissions <-
+    icc_total_emissions %>%
+    dplyr::collect()
+
+  # return the factset_entity_id to icc_total_emissions data -----------------
+
+  return(icc_total_emissions)
+}
diff --git a/R/workflow.factset-package.R b/R/workflow.factset-package.R
new file mode 100644
index 0000000..4293484
--- /dev/null
+++ b/R/workflow.factset-package.R
@@ -0,0 +1,9 @@
+#' @keywords internal
+"_PACKAGE"
+
+## usethis namespace: start
+#' @importFrom dplyr %>%
+#' @importFrom rlang .data
+#' @importFrom rlang .env
+## usethis namespace: end
+NULL
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1a81e56
--- /dev/null
+++ b/README.md
@@ -0,0 +1,17 @@
+# workflow.pacta
+
+## Running container
+
+```sh
+docker run -i -t --rm --env-file=.env -v ./foo:/mnt/factset-data IMAGE_NAME
+```
+
+```sh
+# change this value as needed.
+RESOURCEGROUP="myResourceGroup"
+
+# run from repo root
+
+az deployment group create --resource-group "$RESOURCEGROUP" --template-file azure-deploy.json --parameters @azure-deploy.parameters.json
+
+```
diff --git a/azure-deploy.json b/azure-deploy.json
new file mode 100644
index 0000000..6a1a685
--- /dev/null
+++ b/azure-deploy.json
@@ -0,0 +1,160 @@
+{
+  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
+  "contentVersion": "0.0.0.5",
+
+  "parameters": {
+    "location": {
+      "type": "string",
+      "defaultValue": "[resourceGroup().location]",
+      "metadata": {
+        "description": "Location for all resources."
+      }
+    },
+    "identity": {
+      "type": "string",
+      "metadata": {
+        "description": "The ID of the user assigned identity to use for the container group."
+      }
+    },
+    "containerGroupName": {
+      "type": "string",
+      "metadata": {
+        "description": "The name of the container group."
+      }
+    },
+    "restartPolicy": {
+      "type": "string",
+      "defaultValue": "OnFailure",
+      "allowedValues": [
+        "Always",
+        "Never",
+        "OnFailure"
+      ],
+      "metadata": {
+        "description": "The behavior of Azure runtime if container has stopped."
+      }
+    },
+    "rawdata-storageaccountkey": {
+      "type": "securestring",
+      "metadata": {
+        "description": "The storage account key for the rawdata storage account."
+      }
+    },
+    "database-password": {
+      "type": "securestring",
+      "metadata": {
+        "description": "password to connect to database"
+      }
+    },
+    "starttime": {
+      "type": "string",
+      "defaultValue": "[utcNow()]",
+      "metadata": {
+        "description": "The time to start the container group."
+      }
+    }
+  },
+
+  "variables": {
+    "PGDATABASE": "FDS",
+    "PGHOST": "[concat('factset-01-postgres', '.postgres.database.azure.com')]",
+    "PGUSER": "postgres",
+    "containerregistry": "ghcr.io/rmi-pacta",
+    "machineCpuCores": 1,
+    "machineMemoryInGB": 16,
+    "mountPathExport": "/mnt/factset-extracted"
+  },
+
+  "functions": [],
+
+  "resources": [
+    {
+      "type": "Microsoft.ContainerInstance/containerGroups",
+      "apiVersion": "2021-09-01",
+      "name": "[parameters('containerGroupName')]",
+      "location": "[parameters('location')]",
+      "identity": {
+        "type": "UserAssigned",
+        "userAssignedIdentities": {
+          "[parameters('identity')]": {}
+        }
+      },
+      "properties": {
+        "containers": [
+          {
+            "name": "loader-runner",
+            "properties": {
+              "image": "[concat(variables('containerregistry'),'/workflow.factset:pr1')]",
+              "ports": [],
+              "resources": {
+                "requests": {
+                  "cpu": "[variables('machineCpuCores')]",
+                  "memoryInGB": "[variables('machineMemoryInGB')]"
+                }
+              },
+              "environmentVariables": [
+                {
+                  "name": "PGUSER",
+                  "value": "[variables('PGUSER')]"
+                },
+                {
+                  "name": "PGPASSWORD",
+                  "secureValue": "[parameters('database-password')]"
+                },
+                {
+                  "name": "PGHOST",
+                  "value": "[variables('PGHOST')]"
+                },
+                {
+                  "name": "PGDATABASE",
+                  "value": "[variables('PGDATABASE')]"
+                },
+                {
+                  "name": "DEPLOY_START_TIME",
+                  "value": "[parameters('starttime')]"
+                },
+                {
+                  "name": "MACHINE_CORES",
+                  "value": "[variables('machineCpuCores')]"
+                },
+                {
+                  "name": "LOG_LEVEL",
+                  "value": "TRACE"
+                },
+                {
+                  "name": "EXPORT_DESTINATION",
+                  "value": "[variables('mountPathExport')]"
+                },
+                                {
+                  "name": "DATA_TIMESTAMP",
+                  "value": "20230123"
+                }
+
+              ],
+              "volumeMounts": [
+                {
+                  "name": "factset-extracted",
+                  "mountPath": "[variables('mountPathExport')]"
+                }
+              ]
+            }
+          }
+        ],
+        "restartPolicy": "[parameters('restartPolicy')]",
+        "osType": "Linux",
+        "volumes": [
+          {
+            "name": "factset-extracted",
+            "azureFile": {
+              "shareName": "factset-extracted",
+              "readOnly": false,
+              "storageAccountName": "pactarawdata",
+              "storageAccountKey": "[parameters('rawdata-storageaccountkey')]"
+            }
+          }
+        ]
+      }
+    }
+  ],
+  "outputs": {}
+}
diff --git a/example.env b/example.env
new file mode 100644
index 0000000..e615517
--- /dev/null
+++ b/example.env
@@ -0,0 +1,7 @@
+DEPLOY_START_TIME=20000101T000001
+EXPORT_DESTINATION=/mnt/factset-data
+LOG_LEVEL=TRACE
+PGDATABASE=FDS
+PGHOST=postgres.example.com
+PGPASSWORD=SuperSecrtPassw0rd
+PGUSER=postgres
diff --git a/man/connect_factset_db.Rd b/man/connect_factset_db.Rd
new file mode 100644
index 0000000..ee82e71
--- /dev/null
+++ b/man/connect_factset_db.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/connect_factset_db.R
+\name{connect_factset_db}
+\alias{connect_factset_db}
+\title{Export files for use in PACTA data preparation}
+\usage{
+connect_factset_db(
+  dbname = Sys.getenv("PGDATABASE"),
+  host = Sys.getenv("PGHOST"),
+  port = Sys.getenv("PGPORT", 5432L),
+  options = "-c search_path=fds",
+  username = Sys.getenv("PGUSER"),
+  password = Sys.getenv("PGPASSWORD")
+)
+}
+\arguments{
+\item{dbname}{name of the database to connect to}
+
+\item{host}{hostname of the server to connect to}
+
+\item{port}{port number of the server to connect to}
+
+\item{options}{additional options to pass to the database connection.
+Typically used to define schema search path.}
+
+\item{username}{username to use for the database connection}
+
+\item{password}{password to use for the database connection}
+}
+\value{
+a database connection object
+}
+\description{
+Export files for use in PACTA data preparation
+}
diff --git a/man/export_pacta_files.Rd b/man/export_pacta_files.Rd
new file mode 100644
index 0000000..a11143e
--- /dev/null
+++ b/man/export_pacta_files.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/export_pacta_files.R
+\name{export_pacta_files}
+\alias{export_pacta_files}
+\title{Export files for use in PACTA data preparation}
+\usage{
+export_pacta_files(
+  conn = connect_factset_db(),
+  destination = file.path(Sys.getenv("EXPORT_DESTINATION")),
+  data_timestamp = Sys.getenv("DATA_TIMESTAMP", Sys.time()),
+  terminate_connection = (deparse(substitute(conn)) ==
+    formals(export_pacta_files)[["conn"]])
+)
+}
+\arguments{
+\item{destination}{path to directory where exported files will be saved}
+
+\item{data_timestamp}{filter data as-of this timestamp}
+
+\item{Destination}{directory for the output files}
+}
+\value{
+vector of paths to exported files
+}
+\description{
+Export files for use in PACTA data preparation
+}
diff --git a/man/get_entity_financing_data.Rd b/man/get_entity_financing_data.Rd
new file mode 100644
index 0000000..1cfb624
--- /dev/null
+++ b/man/get_entity_financing_data.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_entity_financing_data.R
+\name{get_entity_financing_data}
+\alias{get_entity_financing_data}
+\title{Get the entity financing data from the FactSet database and prepare the
+\code{factset_entity_financing_data} tibble}
+\usage{
+get_entity_financing_data(conn, data_timestamp)
+}
+\arguments{
+\item{conn}{databse connection}
+
+\item{data_timestamp}{A single string specifying the desired date for the
+data in the form "2021-12-31"}
+}
+\value{
+A tibble properly prepared to be saved as the
+\code{factset_entity_financing_data.rds} output file
+}
+\description{
+Get the entity financing data from the FactSet database and prepare the
+\code{factset_entity_financing_data} tibble
+}
diff --git a/man/get_entity_info.Rd b/man/get_entity_info.Rd
new file mode 100644
index 0000000..564b5d4
--- /dev/null
+++ b/man/get_entity_info.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_entity_info.R
+\name{get_entity_info}
+\alias{get_entity_info}
+\title{Get the entity info data from the FactSet database and prepare the
+\code{factset_entity_info} tibble}
+\usage{
+get_entity_info(conn)
+}
+\arguments{
+\item{conn}{database connection}
+}
+\value{
+A tibble properly prepared to be saved as the
+\code{factset_entity_info.rds} output file
+}
+\description{
+Get the entity info data from the FactSet database and prepare the
+\code{factset_entity_info} tibble
+}
diff --git a/man/get_financial_data.Rd b/man/get_financial_data.Rd
new file mode 100644
index 0000000..efc14ae
--- /dev/null
+++ b/man/get_financial_data.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_financial_data.R
+\name{get_financial_data}
+\alias{get_financial_data}
+\title{Get the factset financial data from the FactSet database and prepare the
+\code{factset_financial_data} tibble}
+\usage{
+get_financial_data(conn, data_timestamp, ...)
+}
+\arguments{
+\item{conn}{databse connection}
+
+\item{data_timestamp}{A single string specifying the desired date for the
+data in the form "2021-12-31"}
+}
+\value{
+A tibble properly prepared to be saved as the
+\code{factset_financial_data.rds} output file
+}
+\description{
+Get the factset financial data from the FactSet database and prepare the
+\code{factset_financial_data} tibble
+}
diff --git a/man/get_fund_data.Rd b/man/get_fund_data.Rd
new file mode 100644
index 0000000..c6113a1
--- /dev/null
+++ b/man/get_fund_data.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_fund_data.R
+\name{get_fund_data}
+\alias{get_fund_data}
+\title{Get the fund data from the FactSet database and prepare the
+\code{factset_fund_data} tibble}
+\usage{
+get_fund_data(conn, data_timestamp)
+}
+\arguments{
+\item{conn}{databse connection}
+
+\item{data_timestamp}{A single string specifying the desired date for the
+data in the form "2021-12-31"}
+}
+\value{
+A tibble properly prepared to be saved as the \code{factset_fund_data.rds}
+output file
+}
+\description{
+Get the fund data from the FactSet database and prepare the
+\code{factset_fund_data} tibble
+}
diff --git a/man/get_isin_to_fund_table.Rd b/man/get_isin_to_fund_table.Rd
new file mode 100644
index 0000000..2112e16
--- /dev/null
+++ b/man/get_isin_to_fund_table.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_isin_to_fund_table.R
+\name{get_isin_to_fund_table}
+\alias{get_isin_to_fund_table}
+\title{Get the isin_to_fund_table data from the FactSet database and prepare the
+\code{factset_isin_to_fund_table} tibble}
+\usage{
+get_isin_to_fund_table(conn)
+}
+\arguments{
+\item{conn}{database connection}
+}
+\value{
+A tibble properly prepared to be saved as the
+\code{factset_isin_to_fund_table.rds} output file
+}
+\description{
+Get the isin_to_fund_table data from the FactSet database and prepare the
+\code{factset_isin_to_fund_table} tibble
+}
diff --git a/man/get_iss_emissions_data.Rd b/man/get_iss_emissions_data.Rd
new file mode 100644
index 0000000..ab78fd4
--- /dev/null
+++ b/man/get_iss_emissions_data.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_iss_emissions_data.R
+\name{get_iss_emissions_data}
+\alias{get_iss_emissions_data}
+\title{Get the ISS emissions data from the FactSet database and prepare the
+\code{factset_iss_emissions} tibble}
+\usage{
+get_iss_emissions_data(
+  conn,
+  reporting_year,
+  min_estimated_trust = 0,
+  min_reported_trust = 0
+)
+}
+\arguments{
+\item{conn}{databse connection}
+
+\item{min_estimated_trust}{A single numeric specifying the minimum allowed
+"estimated trust" value}
+
+\item{min_reported_trust}{A single numeric specifying the minimum allowed
+"reported trust" value}
+
+\item{year}{A single numeric specifying the year of data to be returned}
+
+\item{...}{Arguments to be passed to the \code{connect_conn()} function (for
+specifying database connection parameters)}
+}
+\value{
+A tibble properly prepared to be saved as the
+\code{factset_iss_emissions.rds} output file
+}
+\description{
+Get the ISS emissions data from the FactSet database and prepare the
+\code{factset_iss_emissions} tibble
+}
diff --git a/man/workflow.factset-package.Rd b/man/workflow.factset-package.Rd
new file mode 100644
index 0000000..ba4560a
--- /dev/null
+++ b/man/workflow.factset-package.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/workflow.factset-package.R
+\docType{package}
+\name{workflow.factset-package}
+\alias{workflow.factset}
+\alias{workflow.factset-package}
+\title{workflow.factset: Extract Financial Data for use in PACTA}
+\description{
+Extract data from a FactSet Postgres database for use as part of PACTA Data Preparation
+}
+\author{
+\strong{Maintainer}: CJ Yetman \email{cj@cjyetman.com} (\href{https://orcid.org/0000-0001-5099-9500}{ORCID}) [contractor]
+
+Authors:
+\itemize{
+  \item Jackson Hoffart \email{jackson.hoffart@gmail.com} (\href{https://orcid.org/0000-0002-8600-5042}{ORCID}) [contractor]
+  \item Jacob Kastl \email{jacob.kastl@gmail.com} [contractor]
+  \item Alex Axthelm \email{aaxthelm@rmi.org} (\href{https://orcid.org/0000-0001-8579-8565}{ORCID}) [contractor]
+}
+
+Other contributors:
+\itemize{
+  \item RMI \email{PACTA4investors@rmi.org} [copyright holder, funder]
+}
+
+}
+\keyword{internal}