Skip to content

Commit

Permalink
TER-405 add doc and help for tf-module harvest (#163)
Browse files Browse the repository at this point in the history
document these commands, and update command help text:
- harvest resources
- harvest modules
- harvest mappings

Also, update table of contents
  • Loading branch information
Kanak Singhal authored Dec 12, 2023
1 parent 5622be1 commit 5dc63c5
Show file tree
Hide file tree
Showing 12 changed files with 341 additions and 51 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ farm-release-pull:
fi; \
curl -L -o data/$(FARM_DB_DUMP_FILE) "$$DB_DUMP_URL"

.PHONY: toc
toc:
./scripts/doc_tree.sh

.PHONY: help
help:
@grep -hE '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
Expand Down
16 changes: 14 additions & 2 deletions TOC.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
# Table of contents

* [CONTRIBUTING.md](./CONTRIBUTING.md)
* [SUPPORT.md](./SUPPORT.md)
* [TOC.md](./TOC.md)
* [readme.md](./readme.md)
* [setup.md](./setup.md)
* [.github/workflows/README.md](./.github/workflows/README.md)
* [examples/cldcvr-modules-platform/FAQs.md](./examples/cldcvr-modules-platform/FAQs.md)
* [examples/cldcvr-modules-platform/platform.md](./examples/cldcvr-modules-platform/platform.md)
* [examples/cldcvr-modules-platform/readme.md](./examples/cldcvr-modules-platform/readme.md)
* [examples/cldcvr-modules-platform/terraform.md](./examples/cldcvr-modules-platform/terraform.md)
* [examples/cldcvr-modules-platform/tf-without-platform/resources.md](./examples/cldcvr-modules-platform/tf-without-platform/resources.md)
* [examples/farm/readme.md](./examples/farm/readme.md)
* [examples/lz/readme.md](./examples/lz/readme.md)
* [examples/lz/platforms/readme.md](./examples/lz/platforms/readme.md)
* [examples/lz/requirements/readme.md](./examples/lz/requirements/readme.md)
* [examples/platform-demo/readme.md](./examples/platform-demo/readme.md)
* [examples/platform/readme.md](./examples/platform/readme.md)
* [getting_started/developer_perspective.md](./getting_started/developer_perspective.md)
* [getting_started/devops_perspective.md](./getting_started/devops_perspective.md)
* [readme/public_doc/CLI](./REFERENCE/cli.md)
* [readme/public_doc/CLI](./REFERENCE/cli.md)
* [readme/public_doc/FARM/terrarium-farm.md](./readme/public_doc/FARM/terrarium-farm.md)
Expand All @@ -20,11 +30,13 @@
* [readme/public_doc/PLATFORM/components.md](./readme/public_doc/PLATFORM/components.md)
* [readme/public_doc/PLATFORM/dependency-interface.md](./readme/public_doc/PLATFORM/dependency-interface.md)
* [readme/public_doc/PLATFORM/platform.md](./readme/public_doc/PLATFORM/platform.md)
* [src/cli/cmd/harvest/mappings/readme.md](./src/cli/cmd/harvest/mappings/readme.md)
* [src/cli/cmd/harvest/modules/readme.md](./src/cli/cmd/harvest/modules/readme.md)
* [src/cli/cmd/harvest/resources/readme.md](./src/cli/cmd/harvest/resources/readme.md)
* [src/pkg/confighelper/readme.md](./src/pkg/confighelper/readme.md)
* [src/pkg/db/readme.md](./src/pkg/db/readme.md)
* [src/pkg/metadata/app/readme.md](./src/pkg/metadata/app/readme.md)
* [src/pkg/metadata/cli/readme.md](./src/pkg/metadata/cli/readme.md)
* [src/pkg/metadata/dependency/readme.md](./src/pkg/metadata/dependency/readme.md)
* [src/pkg/metadata/modulelist/readme.md](./src/pkg/metadata/modulelist/readme.md)
* [src/pkg/metadata/platform/readme.md](./src/pkg/metadata/platform/readme.md)
* [src/pkg/transporthelper/readme.md](./src/pkg/transporthelper/readme.md)
3 changes: 2 additions & 1 deletion scripts/doc_tree.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
# Copyright (c) Ollion
# SPDX-License-Identifier: Apache-2.0

echo "# Table of contents\n" > TOC.md
echo "# Table of contents" > TOC.md
echo "" > TOC.md
rsync -avn . /dev/shm --exclude-from .gitignore --exclude-from .git/info/exclude | grep "\.md$" | while IFS= read -r line; do
printf "* [%s](./%s)\n" $line $line >> TOC.md
done;
15 changes: 10 additions & 5 deletions src/cli/cmd/harvest/mappings/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"path/filepath"

"github.com/MakeNowJust/heredoc/v2"
"github.com/charmbracelet/log"
"github.com/cldcvr/terraform-config-inspect/tfconfig"
"github.com/cldcvr/terrarium/src/cli/internal/config"
Expand All @@ -29,13 +30,17 @@ func NewCmd() *cobra.Command {
cmd = &cobra.Command{
Use: "mappings",
Short: "Scrapes resource attribute mappings from the terraform directory",
Long: "The 'mappings' command scrapes resource attribute mappings from the specified terraform directory.",
RunE: cmdRunE,
Long: heredoc.Doc(
`The 'mappings' command scrapes resource attribute mappings from the specified terraform directory.
It parses Terraform code and its modules to find mappings between input and output resource attributes,
such as linking an input attribute of one resource to an output attribute of another.
`),
RunE: cmdRunE,
}

cmd.Flags().StringVarP(&flagTFDir, "dir", "d", ".", "terraform directory path")
cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "list file of modules to process")
cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "store all module sources in this directory; improves performance by reusing data between harvest commands")
cmd.Flags().StringVarP(&flagTFDir, "dir", "d", ".", "Path to the Terraform directory")
cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "Path to a file listing modules to process. In this mode, 'terraform init' and 'terraform providers schema -json' are executed automatically. More details at https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md")
cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "Directory for storing module sources. Using a workdir improves performance by reusing data between harvesting multiple modules. This flag should be used in conjunction with 'module-list-file'.")

return cmd
}
Expand Down
89 changes: 89 additions & 0 deletions src/cli/cmd/harvest/mappings/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Terraform Mappings Harvester

The Terraform Mappings Harvester is a specialized tool designed to parse Terraform code and its modules to identify and scrape resource attribute mappings.

## Working

```mermaid
flowchart TD
TC["Terraform HCL code"] --"terraform init"--> T[".terraform/modules"]
T --> MC["Inner Module code\n.terraform/modules/module_name/*.tf"]
MC --> MH["Mappings Harvester"]
MH --> DB["Database\ntf_resource_attribute_mappings"]
```

The Terraform Mappings Harvester operates through a series of steps to identify and extract resource attribute mappings from Terraform code. The process, as visualized in the above Mermaid diagram, involves the following key stages:

1. **Terraform HCL Code Processing**:
- The process begins with the Terraform HCL (HashiCorp Configuration Language) code.
- The user runs `terraform init` on their Terraform code. This command is crucial for initializing a Terraform working directory, which includes downloading and setting up the necessary modules.

2. **Extraction of Inner Module Code**:
- The initialization process creates a `.terraform/modules` directory.
- Within this directory, the Harvester focuses on the inner module code, which is located in `.terraform/modules/module_name/*.tf`. These files contain the actual Terraform code for each module used in the project.
- This step is critical as it involves parsing the Terraform files to identify the resource attribute mappings.

3. **Mappings Harvester Processing**:
- The Mappings Harvester then processes the extracted module code.
- It systematically scans through the `.tf` files, identifying and extracting mappings between input and output resource attributes. For example, it can detect a mapping like `aws_security_group.vpc_id = aws_vpc.id`, indicating that the `vpc_id` attribute of `aws_security_group` maps to the `id` attribute of `aws_vpc` by analyzing at a code block like this:

```tf
resource "aws_security_group" "dbsg" {
name = "db"
description = "security group for db"
vpc_id = aws_vpc.demo.id
}
```
4. **Storing in Database**:
- After processing, the Harvester stores the identified mappings in a database.
- The database contains a table like `tf_resource_attribute_mappings`, where the mappings between resource attributes are recorded.
- This structured storage allows for efficient querying and analysis of the resource attribute mappings.
## Usage
### Command Syntax
To use the Terraform Mappings Harvester, run the following command:
```sh
terrarium harvest mappings --dir <path-to-terraform-directory>
```

Replace `<path-to-terraform-directory>` with the actual path to your Terraform project.

#### Additional Flags

- **Module List File** (`--module-list-file`): Specify a file listing the modules to be processed. This is useful for analyzing multiple modules.

- **Working Directory** (`--workdir`): Define a directory for storing module sources. This improves performance by reusing data between commands, especially beneficial when processing multiple modules.

### Direct Scraping

To scrape mappings directly from a Terraform directory:

1. **Run the Harvester**:

```sh
terrarium harvest mappings --dir <path-to-terraform-directory>
```

This command will parse the Terraform code in the specified directory and identify resource attribute mappings.

### Using a Module List File

To scrape mappings using a module list file:

1. **Prepare a Module List File**: Create a file listing the modules you want to process. Refer to the [module list file documentation](https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md) for the format and details.

2. **Run the Harvester**:

```sh
terrarium harvest mappings --module-list-file <path-to-module-list-file>
```

This command will process only the modules specified in the module list file.

## Monitoring Execution

Monitor the harvester's execution through the console output. It will provide progress messages and any errors encountered during the scraping process.
21 changes: 14 additions & 7 deletions src/cli/cmd/harvest/modules/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,26 @@ func NewCmd() *cobra.Command {
cmd = &cobra.Command{
Use: "modules",
Aliases: []string{"mo"},
Short: "Scrapes Terraform modules and attributes from the terraform directory",
Short: "Harvests Terraform modules and attributes into the database",
Long: heredoc.Doc(`
The 'modules' command scrapes all Terraform modules and its attributes from the specified terraform directory.
The 'modules' command harvests all Terraform modules and their attributes into the database.
Prerequisite: Run "terraform init" in the directory before using this command.
This command can operate in two modes:
1. Direct scraping from a specified Terraform directory.
2. Processing a list of modules specified in a module list file.
For direct scraping, ensure to run "terraform init" in the Terraform directory before using this command.
When using a module list file, the command processes only the specified modules.
Additional flags allow including local modules and specifying a working directory for improved performance.
`),
RunE: cmdRunE,
}

cmd.Flags().StringVarP(&flagTFDir, "dir", "d", ".", "terraform directory path")
cmd.Flags().BoolVarP(&flagIncludeLocal, "enable-local-modules", "l", false, "A boolean flag to control include/exclude of local modules")
cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "list file of modules to process")
cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "store all module sources in this directory; improves performance by reusing data between harvest commands")
cmd.Flags().StringVarP(&flagTFDir, "dir", "d", ".", "Path to the Terraform directory")
cmd.Flags().BoolVarP(&flagIncludeLocal, "enable-local-modules", "l", false, "Include local modules in the scraping process")
cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "Path to a file listing modules to process. In this mode, 'terraform init' and 'terraform providers schema -json' are executed automatically. More details at https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md")
cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "Directory for storing module sources. Using a workdir improves performance by reusing data between harvesting multiple modules. This flag should be used in conjunction with 'module-list-file'.")

return cmd
}
Expand Down
90 changes: 90 additions & 0 deletions src/cli/cmd/harvest/modules/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Terraform Modules Harvester

The Terraform Modules Harvester is a tool designed to scrape Terraform modules and their attributes from a specified directory or a list of modules.

## Working

```mermaid
flowchart TD
TC["Terraform HCL code"] --"terraform init"--> T[".terraform/modules"]
T --> MI["Module names, sources, versions\n.terraform/modules/modules.json"]
T --> MA["Module attributes\n.terraform/modules/module_name/*.tf"]
MI --> MH["Module Harvester"]
MA --> MH
MH --> DB["Database\ntf_modules\ntf_module_attributes"]
```

The Terraform Modules Harvester operates through a series of steps to extract information about Terraform modules and their attributes. The process is visualized in the above Mermaid flowchart and can be described as follows:

1. **Terraform HCL Code Processing**:
- The process begins with the Terraform HCL (HashiCorp Configuration Language) code.
- The user runs `terraform init` on their Terraform code. This command initializes a Terraform working directory by downloading and installing the necessary providers and modules.

2. **Extraction of Module Information**:
- The initialization process creates a `.terraform/modules` directory.
- Within this directory, two key sources of information are generated:
- **Module Information File (`modules.json`)**: This file, located at `.terraform/modules/modules.json`, contains details about module names, sources, and versions. It acts as a central repository of metadata about the modules used in the Terraform code.
- **Module Attributes**: For each module, there are HCL files (`.tf` files) within the module's directory (e.g., `.terraform/modules/module_name/*.tf`). These files hold the specific attributes and configurations of each module.

3. **Module Harvester Processing**:
- The Module Harvester tool, processes the information extracted from the `.terraform/modules` directory.
- It reads both the module information from `modules.json` and the module attributes from the individual `.tf` files.
- This step translates the raw data from the Terraform configuration and module files into structured information.

4. **Storing in Database**:
- Once the Module Harvester has processed the information, it stores the results in a database.
- The database consists of tables like `tf_modules` and `tf_module_attributes`, where the harvested data about modules and their attributes is organized and stored for further use or analysis.

## Usage

### Modes of Operation

The scraper can be operated in two distinct modes:

1. **Direct Scraping from Terraform Directory**: This mode directly scrapes modules from a specified Terraform directory.

2. **Using a Module List File**: This mode processes modules listed in a provided module list file.

### Prerequisites

- **Terraform**: Ensure Terraform is installed and accessible.

### Direct Scraping

To scrape modules directly from a Terraform directory:

1. **Initialize Terraform**: Ensure you have run `terraform init` in your Terraform project directory.

2. **Run the Scraper**:

```sh
terrarium harvest modules --dir <path-to-terraform-directory>
```

Replace `<path-to-terraform-directory>` with the actual path to your Terraform project.

#### Additional Flags

- **Enable Local Modules** (`--enable-local-modules`): Include locally referenced modules in the scraping process with project directory path as the namespace. This is a boolean flag.

### Using a Module List File

To scrape modules using a module list file:

1. **Prepare a Module List File**: Create a file listing the modules you want to process. Refer to the [module list file documentation](https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md) for the format and details.

2. **Run the Scraper**:

```sh
terrarium harvest modules --module-list-file <path-to-module-list-file>
```

Replace `<path-to-module-list-file>` with the path to your module list file.

#### Additional Flags

- **Working Directory** (`--workdir`): Specify a directory for storing module sources. This improves performance by reusing data between running commands for the multiple modules in the list file.

### Monitoring Execution

Monitor the scraper's execution through the console output. It will provide progress messages and any errors encountered during the scraping process.
25 changes: 19 additions & 6 deletions src/cli/cmd/harvest/resources/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,33 @@ var (
flagWorkDir string
)

const DefaultSchemaPath = ".terraform/providers/schema.json"

func NewCmd() *cobra.Command {
cmd = &cobra.Command{
Use: "resources",
Aliases: []string{"res"},
Short: "Harvests Terraform resources and attributes using the provider schema json",
Short: "Harvests Terraform providers, resource types, and resource attributes",
Long: heredoc.Docf(`
Harvests Terraform resources and attributes using the provider schema json.
Harvests Terraform providers, resource types, and resource attributes.
This command operates in two modes:
1. Using a pre-generated provider schema JSON file.
2. Using a module list file, where 'terraform init' and 'terraform providers schema -json'
are executed automatically for multiple given modules.
This command requires terraform provider schema already generated. To do that, run:
For the first mode, ensure the provider schema JSON file is generated using:
terraform init && terraform providers schema -json > %s
In the second mode, only specify a module list file, and the necessary Terraform commands
are run internally to generate the required data.
`, DefaultSchemaPath),
RunE: cmdRunE,
}

cmd.Flags().StringVarP(&flagSchemaFile, "schema-file", "s", DefaultSchemaPath, "terraform provider schema json file path")
cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "list file of modules to process")
cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "store all module sources in this directory; improves performance by reusing data between harvest commands")
cmd.Flags().StringVarP(&flagSchemaFile, "schema-file", "s", DefaultSchemaPath, "Path to the Terraform provider schema JSON file. Use this for the first mode of operation.")
cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "Path to a file listing modules to process. In this mode, 'terraform init' and 'terraform providers schema -json' are executed automatically. More details at https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md")
cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "Directory for storing module sources. Using a workdir improves performance by reusing data between harvesting multiple modules. This flag should be used in conjunction with 'module-list-file'.")

return cmd
}
Expand All @@ -53,11 +63,14 @@ func cmdRunE(cmd *cobra.Command, _ []string) error {
return eris.Wrapf(err, "error connecting to the database")
}

// First mode - using schema file
if flagModuleListFile == "" {
fmt.Fprintf(cmd.OutOrStdout(), "Loading modules from the provider schema JSON file at '%s'...\n", flagSchemaFile)
return loadFrom(g, flagSchemaFile)
}

// Second mode using module list file

fmt.Fprintf(cmd.OutOrStdout(), "Loading modules from modules list YAML file '%s'...\n", flagModuleListFile)
moduleList, err := modulelist.LoadFarmModules(flagModuleListFile)
if err != nil {
Expand Down
Loading

3 comments on commit 5dc63c5

@26christy
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Job Name cldcvr/terrarium integration pipeline
Job Status failed
Total Steps 2
Progress 1
Message An unexpected error occurred: build has finished with status: Failure
View on Code Pipes

@26christy
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Job Name test integration pipeline
Job Status failed
Total Steps 2
Progress 1
Message An unexpected error occurred: build has finished with status: Failure
View on Code Pipes

@26christy
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Job Name cldcvr/terrarium integration pipeline
Job Status failed
Total Steps 2
Progress 1
Message An unexpected error occurred: build has finished with status: Failure
View on Code Pipes

Please sign in to comment.