TER-405 add doc and help for tf-module harvest (#163)

document these commands, and update command help text: - harvest resources - harvest modules - harvest mappings Also, update table of contents
nonfx · Dec 12, 2023 · 5dc63c5 · 5dc63c5 · 26christy · Dec 12, 2023
1 parent 5622be1
commit 5dc63c5
Show file tree

Hide file tree

Showing 12 changed files with 341 additions and 51 deletions.
diff --git a/Makefile b/Makefile
@@ -201,6 +201,10 @@ farm-release-pull:
 	fi; \
 	curl -L -o data/$(FARM_DB_DUMP_FILE) "$$DB_DUMP_URL"
 
+.PHONY: toc
+toc:
+	./scripts/doc_tree.sh
+
 .PHONY: help
 help:
 	@grep -hE '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

diff --git a/TOC.md b/TOC.md
@@ -1,13 +1,23 @@
-# Table of contents
 
 * [CONTRIBUTING.md](./CONTRIBUTING.md)
 * [SUPPORT.md](./SUPPORT.md)
 * [TOC.md](./TOC.md)
 * [readme.md](./readme.md)
 * [setup.md](./setup.md)
 * [.github/workflows/README.md](./.github/workflows/README.md)
+* [examples/cldcvr-modules-platform/FAQs.md](./examples/cldcvr-modules-platform/FAQs.md)
+* [examples/cldcvr-modules-platform/platform.md](./examples/cldcvr-modules-platform/platform.md)
+* [examples/cldcvr-modules-platform/readme.md](./examples/cldcvr-modules-platform/readme.md)
+* [examples/cldcvr-modules-platform/terraform.md](./examples/cldcvr-modules-platform/terraform.md)
+* [examples/cldcvr-modules-platform/tf-without-platform/resources.md](./examples/cldcvr-modules-platform/tf-without-platform/resources.md)
 * [examples/farm/readme.md](./examples/farm/readme.md)
+* [examples/lz/readme.md](./examples/lz/readme.md)
+* [examples/lz/platforms/readme.md](./examples/lz/platforms/readme.md)
+* [examples/lz/requirements/readme.md](./examples/lz/requirements/readme.md)
+* [examples/platform-demo/readme.md](./examples/platform-demo/readme.md)
 * [examples/platform/readme.md](./examples/platform/readme.md)
+* [getting_started/developer_perspective.md](./getting_started/developer_perspective.md)
+* [getting_started/devops_perspective.md](./getting_started/devops_perspective.md)
 * [readme/public_doc/CLI](./REFERENCE/cli.md)
 * [readme/public_doc/CLI](./REFERENCE/cli.md)
 * [readme/public_doc/FARM/terrarium-farm.md](./readme/public_doc/FARM/terrarium-farm.md)
@@ -20,11 +30,13 @@
 * [readme/public_doc/PLATFORM/components.md](./readme/public_doc/PLATFORM/components.md)
 * [readme/public_doc/PLATFORM/dependency-interface.md](./readme/public_doc/PLATFORM/dependency-interface.md)
 * [readme/public_doc/PLATFORM/platform.md](./readme/public_doc/PLATFORM/platform.md)
+* [src/cli/cmd/harvest/mappings/readme.md](./src/cli/cmd/harvest/mappings/readme.md)
+* [src/cli/cmd/harvest/modules/readme.md](./src/cli/cmd/harvest/modules/readme.md)
 * [src/cli/cmd/harvest/resources/readme.md](./src/cli/cmd/harvest/resources/readme.md)
 * [src/pkg/confighelper/readme.md](./src/pkg/confighelper/readme.md)
 * [src/pkg/db/readme.md](./src/pkg/db/readme.md)
 * [src/pkg/metadata/app/readme.md](./src/pkg/metadata/app/readme.md)
-* [src/pkg/metadata/cli/readme.md](./src/pkg/metadata/cli/readme.md)
 * [src/pkg/metadata/dependency/readme.md](./src/pkg/metadata/dependency/readme.md)
+* [src/pkg/metadata/modulelist/readme.md](./src/pkg/metadata/modulelist/readme.md)
 * [src/pkg/metadata/platform/readme.md](./src/pkg/metadata/platform/readme.md)
 * [src/pkg/transporthelper/readme.md](./src/pkg/transporthelper/readme.md)
diff --git a/scripts/doc_tree.sh b/scripts/doc_tree.sh
@@ -2,7 +2,8 @@
 # Copyright (c) Ollion
 # SPDX-License-Identifier: Apache-2.0
 
-echo "# Table of contents\n" > TOC.md
+echo "# Table of contents" > TOC.md
+echo "" > TOC.md
 rsync -avn . /dev/shm --exclude-from .gitignore --exclude-from .git/info/exclude | grep "\.md$" | while IFS= read -r line; do
   printf "* [%s](./%s)\n"  $line $line >> TOC.md
 done;
diff --git a/src/cli/cmd/harvest/mappings/cmd.go b/src/cli/cmd/harvest/mappings/cmd.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"path/filepath"
 
+	"github.com/MakeNowJust/heredoc/v2"
 	"github.com/charmbracelet/log"
 	"github.com/cldcvr/terraform-config-inspect/tfconfig"
 	"github.com/cldcvr/terrarium/src/cli/internal/config"
@@ -29,13 +30,17 @@ func NewCmd() *cobra.Command {
 	cmd = &cobra.Command{
 		Use:   "mappings",
 		Short: "Scrapes resource attribute mappings from the terraform directory",
-		Long:  "The 'mappings' command scrapes resource attribute mappings from the specified terraform directory.",
-		RunE:  cmdRunE,
+		Long: heredoc.Doc(
+			`The 'mappings' command scrapes resource attribute mappings from the specified terraform directory.
+			It parses Terraform code and its modules to find mappings between input and output resource attributes,
+			such as linking an input attribute of one resource to an output attribute of another.
+		`),
+		RunE: cmdRunE,
 	}
 
-	cmd.Flags().StringVarP(&flagTFDir, "dir", "d", ".", "terraform directory path")
-	cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "list file of modules to process")
-	cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "store all module sources in this directory; improves performance by reusing data between harvest commands")
+	cmd.Flags().StringVarP(&flagTFDir, "dir", "d", ".", "Path to the Terraform directory")
+	cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "Path to a file listing modules to process. In this mode, 'terraform init' and 'terraform providers schema -json' are executed automatically. More details at https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md")
+	cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "Directory for storing module sources. Using a workdir improves performance by reusing data between harvesting multiple modules. This flag should be used in conjunction with 'module-list-file'.")
 
 	return cmd
 }

diff --git a/src/cli/cmd/harvest/mappings/readme.md b/src/cli/cmd/harvest/mappings/readme.md
@@ -0,0 +1,89 @@
+# Terraform Mappings Harvester
+
+The Terraform Mappings Harvester is a specialized tool designed to parse Terraform code and its modules to identify and scrape resource attribute mappings.
+
+## Working
+
+```mermaid
+flowchart TD
+  TC["Terraform HCL code"] --"terraform init"--> T[".terraform/modules"]
+  T --> MC["Inner Module code\n.terraform/modules/module_name/*.tf"]
+  MC --> MH["Mappings Harvester"]
+  MH --> DB["Database\ntf_resource_attribute_mappings"]
+```
+
+The Terraform Mappings Harvester operates through a series of steps to identify and extract resource attribute mappings from Terraform code. The process, as visualized in the above Mermaid diagram, involves the following key stages:
+
+1. **Terraform HCL Code Processing**:
+   - The process begins with the Terraform HCL (HashiCorp Configuration Language) code.
+   - The user runs `terraform init` on their Terraform code. This command is crucial for initializing a Terraform working directory, which includes downloading and setting up the necessary modules.
+
+2. **Extraction of Inner Module Code**:
+   - The initialization process creates a `.terraform/modules` directory.
+   - Within this directory, the Harvester focuses on the inner module code, which is located in `.terraform/modules/module_name/*.tf`. These files contain the actual Terraform code for each module used in the project.
+   - This step is critical as it involves parsing the Terraform files to identify the resource attribute mappings.
+
+3. **Mappings Harvester Processing**:
+   - The Mappings Harvester then processes the extracted module code.
+   - It systematically scans through the `.tf` files, identifying and extracting mappings between input and output resource attributes. For example, it can detect a mapping like `aws_security_group.vpc_id = aws_vpc.id`, indicating that the `vpc_id` attribute of `aws_security_group` maps to the `id` attribute of `aws_vpc` by analyzing at a code block like this:
+
+    ```tf
+    resource "aws_security_group" "dbsg" {
+      name        = "db"
+      description = "security group for db"
+      vpc_id      = aws_vpc.demo.id
+    }
+    ```
+
+4. **Storing in Database**:
+   - After processing, the Harvester stores the identified mappings in a database.
+   - The database contains a table like `tf_resource_attribute_mappings`, where the mappings between resource attributes are recorded.
+   - This structured storage allows for efficient querying and analysis of the resource attribute mappings.
+
+## Usage
+
+### Command Syntax
+
+To use the Terraform Mappings Harvester, run the following command:
+
+```sh
+terrarium harvest mappings --dir <path-to-terraform-directory>
+```
+
+Replace `<path-to-terraform-directory>` with the actual path to your Terraform project.
+
+#### Additional Flags
+
+- **Module List File** (`--module-list-file`): Specify a file listing the modules to be processed. This is useful for analyzing multiple modules.
+
+- **Working Directory** (`--workdir`): Define a directory for storing module sources. This improves performance by reusing data between commands, especially beneficial when processing multiple modules.
+
+### Direct Scraping
+
+To scrape mappings directly from a Terraform directory:
+
+1. **Run the Harvester**:
+
+    ```sh
+    terrarium harvest mappings --dir <path-to-terraform-directory>
+    ```
+
+    This command will parse the Terraform code in the specified directory and identify resource attribute mappings.
+
+### Using a Module List File
+
+To scrape mappings using a module list file:
+
+1. **Prepare a Module List File**: Create a file listing the modules you want to process. Refer to the [module list file documentation](https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md) for the format and details.
+
+2. **Run the Harvester**:
+
+    ```sh
+    terrarium harvest mappings --module-list-file <path-to-module-list-file>
+    ```
+
+    This command will process only the modules specified in the module list file.
+
+## Monitoring Execution
+
+Monitor the harvester's execution through the console output. It will provide progress messages and any errors encountered during the scraping process.
diff --git a/src/cli/cmd/harvest/modules/cmd.go b/src/cli/cmd/harvest/modules/cmd.go
@@ -33,19 +33,26 @@ func NewCmd() *cobra.Command {
 	cmd = &cobra.Command{
 		Use:     "modules",
 		Aliases: []string{"mo"},
-		Short:   "Scrapes Terraform modules and attributes from the terraform directory",
+		Short:   "Harvests Terraform modules and attributes into the database",
 		Long: heredoc.Doc(`
-			The 'modules' command scrapes all Terraform modules and its attributes from the specified terraform directory.
+			The 'modules' command harvests all Terraform modules and their attributes into the database.
 
-			Prerequisite: Run "terraform init" in the directory before using this command.
+			This command can operate in two modes:
+			1. Direct scraping from a specified Terraform directory.
+			2. Processing a list of modules specified in a module list file.
+
+			For direct scraping, ensure to run "terraform init" in the Terraform directory before using this command.
+			When using a module list file, the command processes only the specified modules.
+
+			Additional flags allow including local modules and specifying a working directory for improved performance.
 		`),
 		RunE: cmdRunE,
 	}
 
-	cmd.Flags().StringVarP(&flagTFDir, "dir", "d", ".", "terraform directory path")
-	cmd.Flags().BoolVarP(&flagIncludeLocal, "enable-local-modules", "l", false, "A boolean flag to control include/exclude of local modules")
-	cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "list file of modules to process")
-	cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "store all module sources in this directory; improves performance by reusing data between harvest commands")
+	cmd.Flags().StringVarP(&flagTFDir, "dir", "d", ".", "Path to the Terraform directory")
+	cmd.Flags().BoolVarP(&flagIncludeLocal, "enable-local-modules", "l", false, "Include local modules in the scraping process")
+	cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "Path to a file listing modules to process. In this mode, 'terraform init' and 'terraform providers schema -json' are executed automatically. More details at https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md")
+	cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "Directory for storing module sources. Using a workdir improves performance by reusing data between harvesting multiple modules. This flag should be used in conjunction with 'module-list-file'.")
 
 	return cmd
 }

diff --git a/src/cli/cmd/harvest/modules/readme.md b/src/cli/cmd/harvest/modules/readme.md
@@ -0,0 +1,90 @@
+# Terraform Modules Harvester
+
+The Terraform Modules Harvester is a tool designed to scrape Terraform modules and their attributes from a specified directory or a list of modules.
+
+## Working
+
+```mermaid
+flowchart TD
+  TC["Terraform HCL code"] --"terraform init"--> T[".terraform/modules"]
+  T --> MI["Module names, sources, versions\n.terraform/modules/modules.json"]
+  T --> MA["Module attributes\n.terraform/modules/module_name/*.tf"]
+  MI --> MH["Module Harvester"]
+  MA --> MH
+  MH --> DB["Database\ntf_modules\ntf_module_attributes"]
+```
+
+The Terraform Modules Harvester operates through a series of steps to extract information about Terraform modules and their attributes. The process is visualized in the above Mermaid flowchart and can be described as follows:
+
+1. **Terraform HCL Code Processing**:
+   - The process begins with the Terraform HCL (HashiCorp Configuration Language) code.
+   - The user runs `terraform init` on their Terraform code. This command initializes a Terraform working directory by downloading and installing the necessary providers and modules.
+
+2. **Extraction of Module Information**:
+   - The initialization process creates a `.terraform/modules` directory.
+   - Within this directory, two key sources of information are generated:
+     - **Module Information File (`modules.json`)**: This file, located at `.terraform/modules/modules.json`, contains details about module names, sources, and versions. It acts as a central repository of metadata about the modules used in the Terraform code.
+     - **Module Attributes**: For each module, there are HCL files (`.tf` files) within the module's directory (e.g., `.terraform/modules/module_name/*.tf`). These files hold the specific attributes and configurations of each module.
+
+3. **Module Harvester Processing**:
+   - The Module Harvester tool, processes the information extracted from the `.terraform/modules` directory.
+   - It reads both the module information from `modules.json` and the module attributes from the individual `.tf` files.
+   - This step translates the raw data from the Terraform configuration and module files into structured information.
+
+4. **Storing in Database**:
+   - Once the Module Harvester has processed the information, it stores the results in a database.
+   - The database consists of tables like `tf_modules` and `tf_module_attributes`, where the harvested data about modules and their attributes is organized and stored for further use or analysis.
+
+## Usage
+
+### Modes of Operation
+
+The scraper can be operated in two distinct modes:
+
+1. **Direct Scraping from Terraform Directory**: This mode directly scrapes modules from a specified Terraform directory.
+
+2. **Using a Module List File**: This mode processes modules listed in a provided module list file.
+
+### Prerequisites
+
+- **Terraform**: Ensure Terraform is installed and accessible.
+
+### Direct Scraping
+
+To scrape modules directly from a Terraform directory:
+
+1. **Initialize Terraform**: Ensure you have run `terraform init` in your Terraform project directory.
+
+2. **Run the Scraper**:
+
+    ```sh
+    terrarium harvest modules --dir <path-to-terraform-directory>
+    ```
+
+    Replace `<path-to-terraform-directory>` with the actual path to your Terraform project.
+
+#### Additional Flags
+
+- **Enable Local Modules** (`--enable-local-modules`): Include locally referenced modules in the scraping process with project directory path as the namespace. This is a boolean flag.
+
+### Using a Module List File
+
+To scrape modules using a module list file:
+
+1. **Prepare a Module List File**: Create a file listing the modules you want to process. Refer to the [module list file documentation](https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md) for the format and details.
+
+2. **Run the Scraper**:
+
+    ```sh
+    terrarium harvest modules --module-list-file <path-to-module-list-file>
+    ```
+
+    Replace `<path-to-module-list-file>` with the path to your module list file.
+
+#### Additional Flags
+
+- **Working Directory** (`--workdir`): Specify a directory for storing module sources. This improves performance by reusing data between running commands for the multiple modules in the list file.
+
+### Monitoring Execution
+
+Monitor the scraper's execution through the console output. It will provide progress messages and any errors encountered during the scraping process.
diff --git a/src/cli/cmd/harvest/resources/cmd.go b/src/cli/cmd/harvest/resources/cmd.go
@@ -25,23 +25,33 @@ var (
 	flagWorkDir        string
 )
 
+const DefaultSchemaPath = ".terraform/providers/schema.json"
+
 func NewCmd() *cobra.Command {
 	cmd = &cobra.Command{
 		Use:     "resources",
 		Aliases: []string{"res"},
-		Short:   "Harvests Terraform resources and attributes using the provider schema json",
+		Short:   "Harvests Terraform providers, resource types, and resource attributes",
 		Long: heredoc.Docf(`
-			Harvests Terraform resources and attributes using the provider schema json.
+			Harvests Terraform providers, resource types, and resource attributes.
+
+			This command operates in two modes:
+			1. Using a pre-generated provider schema JSON file.
+			2. Using a module list file, where 'terraform init' and 'terraform providers schema -json'
+			   are executed automatically for multiple given modules.
 
-			This command requires terraform provider schema already generated. To do that, run:
+			For the first mode, ensure the provider schema JSON file is generated using:
 				terraform init && terraform providers schema -json > %s
+
+			In the second mode, only specify a module list file, and the necessary Terraform commands
+			are run internally to generate the required data.
 		`, DefaultSchemaPath),
 		RunE: cmdRunE,
 	}
 
-	cmd.Flags().StringVarP(&flagSchemaFile, "schema-file", "s", DefaultSchemaPath, "terraform provider schema json file path")
-	cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "list file of modules to process")
-	cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "store all module sources in this directory; improves performance by reusing data between harvest commands")
+	cmd.Flags().StringVarP(&flagSchemaFile, "schema-file", "s", DefaultSchemaPath, "Path to the Terraform provider schema JSON file. Use this for the first mode of operation.")
+	cmd.Flags().StringVarP(&flagModuleListFile, "module-list-file", "f", "", "Path to a file listing modules to process. In this mode, 'terraform init' and 'terraform providers schema -json' are executed automatically. More details at https://github.com/cldcvr/terrarium/blob/main/src/pkg/metadata/modulelist/readme.md")
+	cmd.Flags().StringVarP(&flagWorkDir, "workdir", "w", "", "Directory for storing module sources. Using a workdir improves performance by reusing data between harvesting multiple modules. This flag should be used in conjunction with 'module-list-file'.")
 
 	return cmd
 }
@@ -53,11 +63,14 @@ func cmdRunE(cmd *cobra.Command, _ []string) error {
 		return eris.Wrapf(err, "error connecting to the database")
 	}
 
+	// First mode - using schema file
 	if flagModuleListFile == "" {
 		fmt.Fprintf(cmd.OutOrStdout(), "Loading modules from the provider schema JSON file at '%s'...\n", flagSchemaFile)
 		return loadFrom(g, flagSchemaFile)
 	}
 
+	// Second mode using module list file
+
 	fmt.Fprintf(cmd.OutOrStdout(), "Loading modules from modules list YAML file '%s'...\n", flagModuleListFile)
 	moduleList, err := modulelist.LoadFarmModules(flagModuleListFile)
 	if err != nil {
Job Name	cldcvr/terrarium integration pipeline
Job Status	failed
Total Steps	2
Progress	1
Message	An unexpected error occurred: build has finished with status: Failure
Job Name	test integration pipeline
Job Status	failed
Total Steps	2
Progress	1
Message	An unexpected error occurred: build has finished with status: Failure