diff --git a/README.md b/README.md index a9870a2..fba1044 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![DOI](https://zenodo.org/badge/453066827.svg)](https://zenodo.org/badge/latestdoi/453066827) [![License: GPL-3.0](https://badgen.net/github/license/sr-lab/GLITCH)](https://www.gnu.org/licenses/gpl-3.0) -[![Python Version](https://img.shields.io/badge/python-3.9+-blue)](https://www.python.org/downloads/) +[![Python Version](https://img.shields.io/badge/python-3.10+-blue)](https://www.python.org/downloads/) [![Last release](https://badgen.net/github/release/sr-lab/GLITCH/)](https://github.com/sr-lab/GLITCH/releases) ![alt text](https://github.com/sr-lab/GLITCH/blob/main/logo.png?raw=true) @@ -38,7 +38,8 @@ https://doi.org/10.1109/ASE56229.2023.00162 title={Polyglot Code Smell Detection for Infrastructure as Code with GLITCH}, year={2023}, pages={2042-2045}, - doi={10.1109/ASE56229.2023.00162}} + doi={10.1109/ASE56229.2023.00162} +} ``` ## Installation diff --git a/glitch/__main__.py b/glitch/__main__.py index 61d9471..db97492 100644 --- a/glitch/__main__.py +++ b/glitch/__main__.py @@ -1,10 +1,11 @@ import json +import tqdm import click, os, sys from pathlib import Path -from typing import Tuple, List, Set, Optional +from typing import Tuple, List, Set, Optional, TextIO, Dict from glitch.analysis.rules import Error, RuleVisitor -from glitch.helpers import RulesListOption, get_smell_types, get_smells +from glitch.helpers import get_smell_types, get_smells from glitch.parsers.docker import DockerParser from glitch.stats.print import print_stats from glitch.stats.stats import FileStats @@ -16,8 +17,10 @@ from glitch.parsers.puppet import PuppetParser from glitch.parsers.terraform import TerraformParser from glitch.parsers.gha import GithubActionsParser +from glitch.exceptions import throw_exception from pkg_resources import resource_filename -from alive_progress import alive_bar # type: ignore +from copy import deepcopy +from concurrent.futures import ThreadPoolExecutor, Future, as_completed # NOTE: These are necessary in order for python to load the visitors. @@ -26,21 +29,87 @@ from glitch.analysis.security import SecurityVisitor # type: ignore -def parse_and_check( +def __parse_and_check( type: UnitBlockType, path: str, module: bool, parser: Parser, analyses: List[RuleVisitor], - errors: List[Error], stats: FileStats, -) -> None: +) -> Set[Error]: + errors: Set[Error] = set() inter = parser.parse(path, type, module) + # Avoids problems with multiple threads (and possibly multiple files) + # sharing the same object + analyses = deepcopy(analyses) + if inter != None: for analysis in analyses: - errors += analysis.check(inter) + errors.update(analysis.check(inter)) stats.compute(inter) + return errors + + +def __print_errors(errors: Set[Error], f: TextIO, linter: bool, csv: bool) -> None: + errors_sorted = sorted(errors, key=lambda e: (e.path, e.line, e.code)) + if linter: + for error in errors_sorted: + print(Error.ALL_ERRORS[error.code] + "," + error.to_csv(), file=f) + elif csv: + for error in errors_sorted: + print(error.to_csv(), file=f) + else: + for error in errors_sorted: + print(error, file=f) + + +def __get_parser(tech: Tech) -> Parser: + if tech == Tech.ansible: + return AnsibleParser() + elif tech == Tech.chef: + return ChefParser() + elif tech == Tech.puppet: + return PuppetParser() + elif tech == Tech.docker: + return DockerParser() + elif tech == Tech.terraform: + return TerraformParser() + elif tech == Tech.gha: + return GithubActionsParser() + else: + raise ValueError(f"Invalid tech: {tech}") + + +def __get_paths_and_title( + folder_strategy: str, path: str, tech: Tech +) -> Tuple[Set[str], str]: + paths: Set[str] = set() + title = "" + + if folder_strategy == "dataset": + paths = set([f.path for f in os.scandir(f"{path}") if f.is_dir()]) + title = "ANALYZING SUBFOLDERS" + elif folder_strategy == "include-all": + extensions = tech.extensions + for root, _, files in os.walk(path): + for name in files: + name_split = name.split(".") + if ( + name_split[-1] in extensions + and not Path(os.path.join(root, name)).is_symlink() + ): + paths.add(os.path.join(root, name)) + title = f"ANALYZING ALL FILES WITH EXTENSIONS {extensions}" + elif folder_strategy == "project": + paths.add(path) + title = "ANALYZING PROJECT" + elif folder_strategy == "module": + paths.add(path) + title = "ANALYZING MODULE" + + return paths, title + def repr_mode( type: UnitBlockType, @@ -58,94 +127,100 @@ def repr_mode( ) @click.option( "--tech", - type=click.Choice([t.value for t in Tech]), + type=click.Choice([t.tech for t in Tech]), required=True, - help="The IaC technology in which the scripts analyzed are written in.", + help="The IaC technology to be considered.", ) @click.option( - "--tableformat", + "--table-format", type=click.Choice(("prettytable", "latex")), required=False, default="prettytable", - help="The presentation format of the tables that show stats about the run.", + help="The presentation format of the tables that summarize the run.", ) @click.option( "--type", type=click.Choice([t.value for t in UnitBlockType]), default=UnitBlockType.unknown, - help="The type of scripts being analyzed.", + help="The type of the scripts being analyzed.", ) @click.option( "--config", type=click.Path(), default="configs/default.ini", - help="The path for a config file. Otherwise the default config will be used.", + help="The path for a config file. Otherwise the default config is used.", ) @click.option( - "--module", - is_flag=True, - default=False, - help="Use this flag if the folder you are going to analyze is a module (e.g. Chef cookbook).", -) -@click.option( - "--dataset", - is_flag=True, - default=False, - help="Use this flag if the folder being analyzed is a dataset. A dataset is a folder with subfolders to be analyzed.", + "--folder-strategy", + type=click.Choice(["project", "module", "dataset", "include-all"]), + default="project", + help="The method used to handle the folder (if the path is a folder). " + "If 'project', the folder is parsed and analyzed as a Project construct. " + "If 'module', the folder is parsed and analyzed as a Module construct. " + "If 'dataset', each subfolder in the root folder is analyzed as a Project construct. " + "If 'include-all', all files inside the folder and its subfolders, and which extensions correspond to the technology being considered, are analyzed individually" + " (e.g. .yml and .yaml files for Ansible). " + "Defaults to 'project'.", ) @click.option( "--linter", is_flag=True, default=False, - help="This flag changes the output to be more usable for other interfaces, such as, extensions for code editors.", -) -@click.option( - "--includeall", - multiple=True, - help="Some files are ignored when analyzing a folder. For instance, sometimes only some" - "folders in the folder structure are considered. Use this option if" - "you want to analyze all the files with a certain extension inside a folder. (e.g. --includeall yml)" - "This flag is only relevant if you are using the dataset flag.", + help="Changes the output to be more usable for other interfaces, such as, extensions for code editors.", ) @click.option( "--csv", is_flag=True, default=False, - help="Use this flag if you want the output to be in CSV format.", + help="Changes the output to CSV format.", ) @click.option( - "--smell_types", - cls=RulesListOption, + "--smell-types", + type=click.Choice(get_smell_types(), case_sensitive=False), multiple=True, help="The type of smell_types being analyzed.", ) -# TODO: Add linter option to here @click.option( "--mode", type=click.Choice(["smell_detector", "repr"]), - help="The mode the tool is running in. If the mode is 'repr', the tool will only print the intermediate representation." - "The default mode is 'smell_detector'.", + help="The mode the tool is running in. If the mode is 'repr', the output is the intermediate representation." + "Defaults to 'smell_detector'.", default="smell_detector", ) +@click.option( + "--n-workers", + type=int, + help="Number of parallel workers to use. Defaults to 1.", + default=1, +) @click.argument("path", type=click.Path(exists=True), required=True) @click.argument("output", type=click.Path(), required=False) def glitch( - tech: str, + tech: str, # type: ignore type: str, path: str, + folder_strategy: str, config: str, - module: bool, csv: bool, - dataset: bool, - includeall: Tuple[str, ...], smell_types: Tuple[str, ...], output: Optional[str], - tableformat: str, + table_format: str, linter: bool, mode: str, + n_workers: int, ): - tech = Tech(tech) + for t in Tech: + if t.tech == tech: + tech: Tech = t + break + else: + raise click.BadOptionUsage( + "tech", + f"Invalid value for 'tech': '{tech}' is not a valid technology.", + ) + type = UnitBlockType(type) + module = folder_strategy == "module" if config != "configs/default.ini" and not os.path.exists(config): raise click.BadOptionUsage( @@ -158,20 +233,9 @@ def glitch( elif config == "configs/default.ini": config = resource_filename("glitch", "configs/default.ini") - parser = None - if tech == Tech.ansible: - parser = AnsibleParser() - elif tech == Tech.chef: - parser = ChefParser() - elif tech == Tech.puppet: - parser = PuppetParser() - elif tech == Tech.docker: - parser = DockerParser() - elif tech == Tech.terraform: - parser = TerraformParser() + parser = __get_parser(tech) + if tech == Tech.terraform: config = resource_filename("glitch", "configs/terraform.ini") - elif tech == Tech.gha: - parser = GithubActionsParser() file_stats = FileStats() if mode == "repr": @@ -190,72 +254,39 @@ def glitch( analyses.append(analysis) errors: List[Error] = [] - if dataset: - if includeall != (): - iac_files: Set[str] = set() - for root, _, files in os.walk(path): - for name in files: - name_split = name.split(".") - if ( - name_split[-1] in includeall - and not Path(os.path.join(root, name)).is_symlink() - ): - iac_files.add(os.path.join(root, name)) - - with alive_bar( - len(iac_files), - title=f"ANALYZING ALL FILES WITH EXTENSIONS {includeall}", - ) as bar: # type: ignore - for file in iac_files: - parse_and_check( - type, file, module, parser, analyses, errors, file_stats - ) - bar() - else: - subfolders = [f.path for f in os.scandir(f"{path}") if f.is_dir()] - with alive_bar(len(subfolders), title="ANALYZING SUBFOLDERS") as bar: # type: ignore - for d in subfolders: - parse_and_check( - type, d, module, parser, analyses, errors, file_stats - ) - bar() - - files = [f.path for f in os.scandir(f"{path}") if f.is_file()] - - with alive_bar(len(files), title="ANALYZING FILES IN ROOT FOLDER") as bar: # type: ignore - for file in files: - parse_and_check( - type, file, module, parser, analyses, errors, file_stats - ) - bar() - else: - parse_and_check(type, path, module, parser, analyses, errors, file_stats) - - errors = sorted(set(errors), key=lambda e: (e.path, e.line, e.code)) + paths: Set[str] + title: str + paths, title = __get_paths_and_title(folder_strategy, path, tech) + futures: List[Future[Set[Error]]] = [] + future_to_path: Dict[Future[Set[Error]], str] = {} + executor = ThreadPoolExecutor(max_workers=n_workers) - if output is None: - f = sys.stdout - else: - f = open(output, "w") - - if linter: - for error in errors: - print(Error.ALL_ERRORS[error.code] + "," + error.to_csv(), file=f) - elif csv: - for error in errors: - print(error.to_csv(), file=f) - else: - for error in errors: - print(error, file=f) + for p in paths: + futures.append( + executor.submit( + __parse_and_check, type, p, module, parser, analyses, file_stats + ) + ) + future_to_path[futures[-1]] = p + f = sys.stdout if output is None else open(output, "w") + for future in tqdm.tqdm(as_completed(futures), total=len(futures), desc=title): + try: + new_errors = future.result() + errors.extend(new_errors) + __print_errors(new_errors, f, linter, csv) + except: + throw_exception("Unknown Error: {}", future_to_path[future]) if f != sys.stdout: f.close() + if not linter: - print_stats(errors, get_smells(smell_types, tech), file_stats, tableformat) + print_stats(errors, get_smells(smell_types, tech), file_stats, table_format) def main() -> None: glitch(prog_name="glitch") -main() +if __name__ == "__main__": + main() diff --git a/glitch/analysis/design/duplicate_block.py b/glitch/analysis/design/duplicate_block.py index 36031e0..1def602 100644 --- a/glitch/analysis/design/duplicate_block.py +++ b/glitch/analysis/design/duplicate_block.py @@ -46,7 +46,7 @@ def check(self, element: CodeElement, file: str) -> List[Error]: error = Error( "design_duplicate_block", element, - element.path, + file, self.code_lines[line - 1], ) error.line = line diff --git a/glitch/analysis/design/imperative_abstraction.py b/glitch/analysis/design/imperative_abstraction.py index 1923886..f58c584 100644 --- a/glitch/analysis/design/imperative_abstraction.py +++ b/glitch/analysis/design/imperative_abstraction.py @@ -28,7 +28,7 @@ def check(self, element: CodeElement, file: str) -> List[Error]: Error( "design_imperative_abstraction", element, - element.path, + file, repr(element), ) ] diff --git a/glitch/analysis/design/improper_alignment.py b/glitch/analysis/design/improper_alignment.py index 9d7bfd2..d1536f2 100644 --- a/glitch/analysis/design/improper_alignment.py +++ b/glitch/analysis/design/improper_alignment.py @@ -14,7 +14,7 @@ def check(self, element: CodeElement, file: str) -> List[Error]: error = Error( "implementation_improper_alignment", element, - element.path, + file, repr(element), ) error.line = i + 1 diff --git a/glitch/analysis/design/long_statement.py b/glitch/analysis/design/long_statement.py index dd2a8d9..b8e77f3 100644 --- a/glitch/analysis/design/long_statement.py +++ b/glitch/analysis/design/long_statement.py @@ -11,9 +11,7 @@ def check(self, element: CodeElement, file: str) -> List[Error]: if isinstance(element, UnitBlock) and element.type != UnitBlockType.block: for i, line in enumerate(self.code_lines): if len(line) > 140: - error = Error( - "implementation_long_statement", element, element.path, line - ) + error = Error("implementation_long_statement", element, file, line) error.line = i + 1 errors.append(error) diff --git a/glitch/analysis/design/too_many_variables.py b/glitch/analysis/design/too_many_variables.py index 7634732..78a0ff4 100644 --- a/glitch/analysis/design/too_many_variables.py +++ b/glitch/analysis/design/too_many_variables.py @@ -25,7 +25,7 @@ def check(self, element: CodeElement, file: str) -> List[Error]: Error( "implementation_too_many_variables", element, - element.path, + file, repr(element), ) ] diff --git a/glitch/analysis/design/unguarded_variable.py b/glitch/analysis/design/unguarded_variable.py index 35e21b3..39ad451 100644 --- a/glitch/analysis/design/unguarded_variable.py +++ b/glitch/analysis/design/unguarded_variable.py @@ -27,7 +27,7 @@ def check(self, element: CodeElement, file: str) -> List[Error]: error = Error( "implementation_unguarded_variable", element, - element.path, + file, string, ) error.line = i + 1 diff --git a/glitch/analysis/design/visitor.py b/glitch/analysis/design/visitor.py index 2e5f0cf..d946535 100644 --- a/glitch/analysis/design/visitor.py +++ b/glitch/analysis/design/visitor.py @@ -54,7 +54,7 @@ def check_module(self, m: Module) -> list[Error]: # errors.append(Error('design_unnecessary_abstraction', m, m.path, repr(m))) return errors - def check_unitblock(self, u: UnitBlock) -> List[Error]: + def check_unitblock(self, u: UnitBlock, file: str) -> List[Error]: if u.path != "": with open(u.path, "r") as f: try: @@ -78,33 +78,33 @@ def check_unitblock(self, u: UnitBlock) -> List[Error]: errors: List[Error] = [] # The order is important for au in u.atomic_units: - errors += self.check_atomicunit(au, u.path) + errors += self.check_atomicunit(au, file) for v in u.variables: - errors += self.check_variable(v, u.path) + errors += self.check_variable(v, file) for a in u.attributes: - errors += self.check_attribute(a, u.path) + errors += self.check_attribute(a, file) for d in u.dependencies: - errors += self.check_dependency(d, u.path) + errors += self.check_dependency(d, file) for s in u.statements: - errors += self.check_element(s, u.path) + errors += self.check_element(s, file) for c in u.comments: - errors += self.check_comment(c, u.path) + errors += self.check_comment(c, file) # FIXME Needs to consider more things # if (len(u.statements) == 0 and len(u.atomic_units) == 0 and # len(u.variables) == 0 and len(u.unit_blocks) == 0 and # len(u.attributes) == 0): - # errors.append(Error('design_unnecessary_abstraction', u, u.path, repr(u))) + # errors.append(Error('design_unnecessary_abstraction', u, file, repr(u))) for checker in self.checkers: checker.code_lines = code_lines checker.variables_names = self.variables_names - errors += checker.check(u, u.path) + errors += checker.check(u, file) # The unit blocks inside should only be considered after in order to # have the correct variables for ub in u.unit_blocks: - errors += self.check_unitblock(ub) + errors += self.check_unitblock(ub, file) variable_size = self.variable_stack.pop() if variable_size == 0: diff --git a/glitch/analysis/rules.py b/glitch/analysis/rules.py index 6ea900c..ff8f335 100644 --- a/glitch/analysis/rules.py +++ b/glitch/analysis/rules.py @@ -3,7 +3,8 @@ from glitch.tech import Tech from glitch.repr.inter import * -ErrorDict = Dict[str, Union[Union[Tech, str], "ErrorDict"]] +ErrorValue = Dict[Tech | str, Dict[str, str] | str] +ErrorDict = Dict[str, ErrorValue] class Error: @@ -66,11 +67,13 @@ class Error: @staticmethod def agglomerate_errors() -> None: - def aux_agglomerate_errors(key: str, errors: Union[str, ErrorDict]) -> None: + def aux_agglomerate_errors( + key: Tech | str, errors: Union[str, ErrorDict, ErrorValue, Dict[str, str]] + ) -> None: if isinstance(errors, dict): for k, v in errors.items(): aux_agglomerate_errors(k, v) - else: + elif isinstance(key, str): Error.ALL_ERRORS[key] = errors aux_agglomerate_errors("", Error.ERRORS) @@ -139,7 +142,7 @@ def check(self, code: Project | Module | UnitBlock) -> List[Error]: elif isinstance(code, Module): return self.check_module(code) else: - return self.check_unitblock(code) + return self.check_unitblock(code, code.path) def check_element(self, c: CodeElement, file: str) -> list[Error]: if isinstance(c, AtomicUnit): @@ -177,31 +180,31 @@ def check_project(self, p: Project) -> list[Error]: errors += self.check_module(m) for u in p.blocks: - errors += self.check_unitblock(u) + errors += self.check_unitblock(u, u.path) return errors def check_module(self, m: Module) -> list[Error]: errors: List[Error] = [] for u in m.blocks: - errors += self.check_unitblock(u) + errors += self.check_unitblock(u, u.path) return errors - def check_unitblock(self, u: UnitBlock) -> list[Error]: + def check_unitblock(self, u: UnitBlock, file: str) -> list[Error]: errors: List[Error] = [] for au in u.atomic_units: - errors += self.check_atomicunit(au, u.path) + errors += self.check_atomicunit(au, file) for c in u.comments: - errors += self.check_comment(c, u.path) + errors += self.check_comment(c, file) for v in u.variables: - errors += self.check_variable(v, u.path) + errors += self.check_variable(v, file) for ub in u.unit_blocks: - errors += self.check_unitblock(ub) + errors += self.check_unitblock(ub, file) for a in u.attributes: - errors += self.check_attribute(a, u.path) + errors += self.check_attribute(a, file) for s in u.statements: - errors += self.check_element(s, u.path) + errors += self.check_element(s, file) return errors diff --git a/glitch/analysis/security.py b/glitch/analysis/security.py index c5c98d5..e91416c 100644 --- a/glitch/analysis/security.py +++ b/glitch/analysis/security.py @@ -436,24 +436,24 @@ def check_condition(self, c: ConditionalStatement, file: str) -> List[Error]: return errors - def check_unitblock(self, u: UnitBlock) -> List[Error]: - errors = super().check_unitblock(u) + def check_unitblock(self, u: UnitBlock, file: str) -> List[Error]: + errors = super().check_unitblock(u, file) # Missing integrity check changed to unit block since in Docker the integrity check is not an attribute of the # atomic unit but can be done on another atomic unit inside the same unit block. missing_integrity_checks = {} for au in u.atomic_units: - result = self.check_integrity_check(au, u.path) + result = self.check_integrity_check(au, file) if result is not None: missing_integrity_checks[result[0]] = result[1] continue - file = SecurityVisitor.check_has_checksum(au) - if file is not None: - if file in missing_integrity_checks: - del missing_integrity_checks[file] + f = SecurityVisitor.check_has_checksum(au) + if f is not None: + if f in missing_integrity_checks: + del missing_integrity_checks[f] errors += missing_integrity_checks.values() - errors += self.non_off_img.check(u, u.path) + errors += self.non_off_img.check(u, file) return errors diff --git a/glitch/exceptions.py b/glitch/exceptions.py index 26f659f..f7da3b3 100644 --- a/glitch/exceptions.py +++ b/glitch/exceptions.py @@ -1,4 +1,7 @@ import sys +import traceback + +from typing import List EXCEPTIONS = { "ANSIBLE_PLAYBOOK": "Ansible - File is not a playbook: {}", @@ -17,4 +20,10 @@ def throw_exception(exception: str, *args: str) -> None: - print(exception.format(*args), file=sys.stderr) + print("Error:", exception.format(*args), file=sys.stderr) + print("=" * 20 + " Traceback " + "=" * 20, file=sys.stderr) + exc: List[str] = traceback.format_exc().split("\n") + if len(exc) > 40: + exc = exc[:20] + ["..."] + exc[-20:] + print("\n".join(exc), file=sys.stderr) + print("=" * 51, file=sys.stderr) diff --git a/glitch/helpers.py b/glitch/helpers.py index 84396b3..4dffe13 100644 --- a/glitch/helpers.py +++ b/glitch/helpers.py @@ -1,50 +1,8 @@ -import click - -from click.types import ParamType -from typing import Optional, List, Tuple, Iterable, Sequence, Any, Union +from typing import List, Tuple, Iterable from glitch.tech import Tech from glitch.analysis.rules import Error -class RulesListOption(click.Option): - def __init__( - self, - param_decls: Optional[Sequence[str]] = None, - show_default: bool = False, - prompt: bool = False, - confirmation_prompt: bool = False, - hide_input: bool = False, - is_flag: Optional[bool] = None, - flag_value: Optional[Any] = None, - multiple: bool = False, - count: bool = False, - allow_from_autoenv: bool = True, - type: Optional[Union[ParamType, Any]] = None, - help: Optional[str] = None, - hidden: bool = False, - show_choices: bool = True, - show_envvar: bool = False, - ) -> None: - super().__init__( - param_decls=param_decls, - show_default=show_default, - prompt=prompt, - confirmation_prompt=confirmation_prompt, - hide_input=hide_input, - is_flag=is_flag, - flag_value=flag_value, - multiple=multiple, - count=count, - allow_from_autoenv=allow_from_autoenv, - type=type, - help=help, - hidden=hidden, - show_choices=show_choices, - show_envvar=show_envvar, - ) - self.type = click.Choice(get_smell_types(), case_sensitive=False) - - def get_smell_types() -> Tuple[str, ...]: """Get list of smell types. diff --git a/glitch/parsers/gha.py b/glitch/parsers/gha.py index 0e441f9..e04cfb7 100644 --- a/glitch/parsers/gha.py +++ b/glitch/parsers/gha.py @@ -49,7 +49,7 @@ def __parse_variable(key: Node, value: Node, lines: List[str]) -> Variable: var = Variable(GithubActionsParser.__get_value(key), var_value, False) if isinstance(var.value, str): var.has_variable = "${{" in var.value - var.line, var.column = key.start_mark.line, key.start_mark.column + var.line, var.column = key.start_mark.line + 1, key.start_mark.column + 1 var.code = GithubActionsParser._get_code(key, value, lines) for child in vars: var.keyvalues.append(child) @@ -70,7 +70,7 @@ def __parse_attribute(key: Node, value: Node, lines: List[str]) -> Attribute: attr = Attribute(GithubActionsParser.__get_value(key), attr_value, False) if isinstance(attr.value, str): attr.has_variable = "${{" in attr.value - attr.line, attr.column = key.start_mark.line, key.start_mark.column + attr.line, attr.column = key.start_mark.line + 1, key.start_mark.column + 1 attr.code = GithubActionsParser._get_code(key, value, lines) for child in attrs: attr.keyvalues.append(child) @@ -79,7 +79,7 @@ def __parse_attribute(key: Node, value: Node, lines: List[str]) -> Attribute: def __parse_job(self, key: Node, value: Node, lines: List[str]) -> UnitBlock: job = UnitBlock(key.value, UnitBlockType.block) - job.line, job.column = key.start_mark.line, key.start_mark.column + job.line, job.column = key.start_mark.line + 1, key.start_mark.column + 1 job.code = GithubActionsParser._get_code(key, value, lines) for attr_key, attr_value in value.value: @@ -93,7 +93,10 @@ def __parse_job(self, key: Node, value: Node, lines: List[str]) -> UnitBlock: au_type = step_dict["uses"] au = AtomicUnit(name, au_type) - au.line, au.column = step.start_mark.line, step.start_mark.column + au.line, au.column = ( + step.start_mark.line + 1, + step.start_mark.column + 1, + ) au.code = GithubActionsParser._get_code(step, step, lines) for key, value in step.value: diff --git a/glitch/parsers/yaml.py b/glitch/parsers/yaml.py index a2f64d0..994234b 100644 --- a/glitch/parsers/yaml.py +++ b/glitch/parsers/yaml.py @@ -31,7 +31,12 @@ def _get_code( for line in range(start_token.start_mark.line + 1, end_token.end_mark.line): res += code[line] - if start_token.start_mark.line != end_token.end_mark.line: + # The end_mark.column > 0 avoids cases where the end token is in the last line + # leading to a index out of range error + if ( + start_token.start_mark.line != end_token.end_mark.line + and end_token.end_mark.column > 0 + ): res += code[end_token.end_mark.line][: end_token.end_mark.column] return res diff --git a/glitch/stats/print.py b/glitch/stats/print.py index a9ff18e..dc0f6da 100644 --- a/glitch/stats/print.py +++ b/glitch/stats/print.py @@ -26,13 +26,13 @@ def print_stats( total_smell_density = 0 for code, n in occurrences.items(): total_occur += n - total_smell_density += round(n / (file_stats.loc / 1000), 2) + total_smell_density += round(n / (max(1, file_stats.loc) / 1000), 2) stats_info.append( ( Error.ALL_ERRORS[code], n, - round(n / (file_stats.loc / 1000), 2), - round((len(files_with_the_smell[code]) / total_files) * 100, 2), + round(n / (max(1, file_stats.loc) / 1000), 2), + round((len(files_with_the_smell[code]) / max(1, total_files)) * 100, 2), ) ) stats_info.append( @@ -40,7 +40,9 @@ def print_stats( "Combined", total_occur, total_smell_density, - round((len(files_with_the_smell["Combined"]) / total_files) * 100, 2), + round( + (len(files_with_the_smell["Combined"]) / max(1, total_files)) * 100, 2 + ), ) ) diff --git a/glitch/tech.py b/glitch/tech.py index bb922d5..ad32131 100644 --- a/glitch/tech.py +++ b/glitch/tech.py @@ -1,10 +1,15 @@ from enum import Enum +from typing import List -class Tech(str, Enum): - ansible = "ansible" - chef = "chef" - puppet = "puppet" - terraform = "terraform" - docker = "docker" - gha = "github-actions" +class Tech(Enum): + def __init__(self, tech: str, extensions: List[str]): + self.tech = tech + self.extensions = extensions + + ansible = "ansible", ["yml", "yaml"] + chef = "chef", ["rb"] + puppet = "puppet", ["pp"] + terraform = "terraform", ["tf"] + docker = "docker", ["Dockerfile"] + gha = "github-actions", ["yml", "yaml"] diff --git a/glitch/tests/cli/resources/chef_project/test.rb b/glitch/tests/cli/resources/chef_project/test.rb new file mode 100644 index 0000000..7119b73 --- /dev/null +++ b/glitch/tests/cli/resources/chef_project/test.rb @@ -0,0 +1,12 @@ +{ + "/etc/delivery/#{new_resource.chef_user}.pem" => new_resource.chef_user_pem, + '/etc/chef/validation.pem' => new_resource.validation_pem, +}.each do |file, src| + chef_file file do + sensitive new_resource.sensitive if new_resource.sensitive + source src + user 'root' + group 'root' + mode '0644' + end +end \ No newline at end of file diff --git a/glitch/tests/cli/test_cli.py b/glitch/tests/cli/test_cli.py new file mode 100644 index 0000000..b4cc4d6 --- /dev/null +++ b/glitch/tests/cli/test_cli.py @@ -0,0 +1,84 @@ +import csv +import subprocess +import glitch.__main__ as glitch + +from typing import Callable, Set, Tuple, List +from glitch.tech import Tech +from tempfile import NamedTemporaryFile + + +def test_cli_help(): + run = subprocess.run(["glitch", "--help"], capture_output=True) + assert run.returncode == 0 + + +def test_cli_get_paths(): + __get_paths_and_title: Callable[[str, str, Tech], Tuple[Set[str], str]] = getattr( + glitch, "__get_paths_and_title" + ) + paths, title = __get_paths_and_title( + "module", "tests/cli/resources/chef_project", Tech.chef + ) + assert paths == {"tests/cli/resources/chef_project"} + assert title == "ANALYZING MODULE" + + paths, title = __get_paths_and_title("dataset", "tests/cli/resources/", Tech.chef) + assert paths == {"tests/cli/resources/chef_project"} + assert title == "ANALYZING SUBFOLDERS" + + paths, title = __get_paths_and_title( + "include-all", "tests/cli/resources/chef_project", Tech.chef + ) + assert paths == {"tests/cli/resources/chef_project/test.rb"} + + paths, title = __get_paths_and_title( + "project", "tests/cli/resources/chef_project", Tech.chef + ) + assert paths == {"tests/cli/resources/chef_project"} + + +def test_cli_analyze(): + with NamedTemporaryFile() as f: + run = subprocess.run( + [ + "glitch", + "--tech", + "chef", + "--folder-strategy", + "include-all", + "--csv", + "tests/cli/resources/chef_project", + f.name, + ], + capture_output=True, + ) + assert run.returncode == 0 + + rows: List[List[str]] = [] + with open(f.name, "r") as f: + reader = csv.reader(f) + for row in reader: + rows.append(row) + + assert len(rows) == 3 + assert rows[0] == [ + "tests/cli/resources/chef_project/test.rb", + "8", + "sec_def_admin", + "user:'root'", + "-", + ] + assert rows[1] == [ + "tests/cli/resources/chef_project/test.rb", + "8", + "sec_hard_secr", + "user:'root'", + "-", + ] + assert rows[2] == [ + "tests/cli/resources/chef_project/test.rb", + "8", + "sec_hard_user", + "user:'root'", + "-", + ] diff --git a/glitch/tests/design/gha/files/too_many_variables.yml b/glitch/tests/design/gha/files/too_many_variables.yml new file mode 100644 index 0000000..b425a58 --- /dev/null +++ b/glitch/tests/design/gha/files/too_many_variables.yml @@ -0,0 +1,15 @@ +name: Package and Release + +on: + push: + tags: + - '**' + workflow_dispatch: + +jobs: + release: + runs-on: ubuntu-latest + env: + CF_API_KEY: ${{ secrets.CF_API_KEY }} + WAGO_API_TOKEN: ${{ secrets.WAGO_API_TOKEN }} + GITHUB_OAUTH: ${{ secrets.GITHUB_TOKEN }} diff --git a/glitch/tests/design/gha/test_design.py b/glitch/tests/design/gha/test_design.py new file mode 100644 index 0000000..9654958 --- /dev/null +++ b/glitch/tests/design/gha/test_design.py @@ -0,0 +1,45 @@ +import unittest + +from glitch.analysis.design.visitor import DesignVisitor +from glitch.analysis.rules import Error +from glitch.parsers.gha import GithubActionsParser +from glitch.tech import Tech +from glitch.repr.inter import UnitBlockType +from typing import List + + +class TestDesign(unittest.TestCase): + def __help_test( + self, path: str, n_errors: int, codes: List[str], lines: List[int] + ) -> None: + parser = GithubActionsParser() + inter = parser.parse(path, UnitBlockType.script, False) + assert inter is not None + analysis = DesignVisitor(Tech.gha) + analysis.config("configs/default.ini") + errors: List[Error] = list(set(analysis.check(inter))) + errors = list( + filter( + lambda e: e.code.startswith("design_") + or e.code.startswith("implementation_"), + errors, + ) + ) + errors = sorted(errors, key=lambda e: (e.path, e.line, e.code)) + self.assertEqual(len(errors), n_errors) + for i in range(n_errors): + self.assertEqual(errors[i].path, path) + self.assertEqual(errors[i].code, codes[i]) + self.assertEqual(errors[i].line, lines[i]) + + # NOTE: This test also verifies if the paths of errors in inner Unit Blocks + # are correctly reported. + def test_gha_too_many_variables(self) -> None: + self.__help_test( + "tests/design/gha/files/too_many_variables.yml", + 1, + [ + "implementation_too_many_variables", + ], + [10], + ) diff --git a/glitch/tests/parser/gha/files/index_out_of_range.yml b/glitch/tests/parser/gha/files/index_out_of_range.yml new file mode 100644 index 0000000..bd64104 --- /dev/null +++ b/glitch/tests/parser/gha/files/index_out_of_range.yml @@ -0,0 +1,19 @@ +name: Package and Release + +on: + push: + tags: + - '**' + +jobs: + release: + runs-on: ubuntu-latest + + steps: + - name: Clone + uses: actions/checkout@v1 + + - name: test + uses: test + with: + args: test diff --git a/glitch/tests/parser/gha/test_parser.py b/glitch/tests/parser/gha/test_parser.py index 0d962e0..31e50e1 100644 --- a/glitch/tests/parser/gha/test_parser.py +++ b/glitch/tests/parser/gha/test_parser.py @@ -31,6 +31,10 @@ def test_gha_valid_workflow(self) -> None: assert len(ir.attributes[0].keyvalues[0].keyvalues) == 1 assert ir.attributes[0].keyvalues[0].keyvalues[0].name == "branches" assert ir.attributes[0].keyvalues[0].keyvalues[0].value == ["main"] + assert ( + ir.attributes[0].keyvalues[0].keyvalues[0].code + == " branches:\n - main\n " + ) assert ir.attributes[0].keyvalues[1].name == "pull_request" assert ir.attributes[0].keyvalues[1].value is None @@ -138,3 +142,13 @@ def test_gha_valid_workflow_2(self) -> None: assert ir.comments[9].content == "# for actions/checkout to fetch code" assert ir.comments[9].line == 31 + + def test_gha_index_out_of_range(self) -> None: + """ + This file previously gave an index out of range even though it is valid. + """ + p = GithubActionsParser() + ir = p.parse_file( + "tests/parser/gha/files/index_out_of_range.yml", UnitBlockType.script + ) + assert ir is not None diff --git a/poetry.lock b/poetry.lock index 80b0387..b867185 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,31 +1,5 @@ # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. -[[package]] -name = "about-time" -version = "4.2.1" -description = "Easily measure timing and throughput of code blocks, with beautiful human friendly representations." -optional = false -python-versions = ">=3.7, <4" -files = [ - {file = "about-time-4.2.1.tar.gz", hash = "sha256:6a538862d33ce67d997429d14998310e1dbfda6cb7d9bbfbf799c4709847fece"}, - {file = "about_time-4.2.1-py3-none-any.whl", hash = "sha256:8bbf4c75fe13cbd3d72f49a03b02c5c7dca32169b6d49117c257e7eb3eaee341"}, -] - -[[package]] -name = "alive-progress" -version = "3.0.1" -description = "A new kind of Progress Bar, with real-time throughput, ETA, and very cool animations!" -optional = false -python-versions = ">=3.7, <4" -files = [ - {file = "alive-progress-3.0.1.tar.gz", hash = "sha256:3245114253b6adb4b38f2a2a1828edfcd9e8c012f7e30a5cef1932ca7344eb44"}, - {file = "alive_progress-3.0.1-py3-none-any.whl", hash = "sha256:1e910ef0ceccfd2b09682f119330bd9a0bf2b4c4ddfa83de4d3d3741e104d98f"}, -] - -[package.dependencies] -about-time = "4.2.1" -grapheme = "0.6.0" - [[package]] name = "attrs" version = "23.2.0" @@ -245,19 +219,6 @@ files = [ [package.dependencies] lark = ">=1.1.5,<2.0" -[[package]] -name = "grapheme" -version = "0.6.0" -description = "Unicode grapheme helpers" -optional = false -python-versions = "*" -files = [ - {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"}, -] - -[package.extras] -test = ["pytest", "sphinx", "sphinx-autobuild", "twine", "wheel"] - [[package]] name = "idna" version = "3.7" @@ -1089,4 +1050,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c343db0e89320116e8a644eb5a74709bb4e6b03c681d075945f989581f55f3ec" +content-hash = "ec85f05b6e1f20f9903addf406af0151c60ba592ef3bc1582e0f7ea9cc680d38" diff --git a/pyproject.toml b/pyproject.toml index dcf53c7..f68b66a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,6 @@ python = "^3.10" "ruamel.yaml" = "0.17.21" ply = "3.11" click = "8.1.7" -alive-progress = "3.0.1" prettytable = "3.6.0" pandas = "1.5.3" configparser = "5.3.0" @@ -25,6 +24,7 @@ z3-solver = "^4.12.4.0" nltk = "^3.8.1" jsonschema = "^4.21.1" setuptools = "^69.5.1" +tqdm = "^4.66.2" [tool.poetry.group.dev.dependencies] pytest = "7.3.1"