Merge branch 'main' into feature/docker-deploy

henrique-tostes-a3 · henrique-tostes-a3 · commit 6f173ee2a213 · 2024-09-26T14:59:57.000-03:00
diff --git a/.github/workflows/pipeline_lint.yml b/.github/workflows/pipeline_lint.yml
@@ -0,0 +1,38 @@
+name: Pipeline de Qualidade de Código
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Verificar código
+      uses: actions/checkout@v4
+
+    - name: Configurar Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+
+    - name: Instalar Poetry
+      run: pip install poetry
+
+    - name: Instalar dependências
+      run: poetry install
+
+    - name: Executar ruff check
+      run: poetry run ruff check .
+
+    - name: Formatando código com ruff
+      run: poetry run ruff format .
+
+    - name: Verificar formatação com black
+      run: poetry run black --check .
+
+    - name: Executar bandit
+      run: poetry run bandit -r . --skip B101
diff --git a/.github/workflows/pipeline_test.yml b/.github/workflows/pipeline_test.yml
@@ -0,0 +1,28 @@
+name: Pipeline de Testes
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Verificar código
+      uses: actions/checkout@v4
+
+    - name: Configurar Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+
+    - name: Instalar Poetry
+      run: pip install poetry
+
+    - name: Instalar dependências
+      run: poetry install
+
+    - name: Executar testes unitários
+      run: poetry run pytest --cov=.
+
diff --git a/__init__.py b/__init__.py
diff --git a/artifacts/models/svc_model.joblib b/artifacts/models/svc_model.joblib
diff --git a/config/settings.py b/config/settings.py
@@ -11,7 +11,6 @@
 INTERIM_DATA_DIR = DATA_DIR / "interim"
 
 MODELS_DIR = BASE_DIR / "artifacts" / "models"
-
 NOTEBOOKS_DIR = BASE_DIR / "notebooks"
 REPORTS_DIR = BASE_DIR / "reports"
 FIGURES_DIR = REPORTS_DIR / "figures"
@@ -24,3 +23,4 @@
 DOCS_DIR = BASE_DIR / "docs"
 TESTS_DIR = BASE_DIR / "tests"
 UI_DIR = BASE_DIR / "ui"
+TEST_LOAD_DATA_CSV = TESTS_DIR / "test_load_data.csv"
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,13 +11,17 @@ requests = "^2.25.1"
 pandas = "^2.0.0"
 typer = "0.12.5"
 scikit-learn = "1.5.1"
-fastapi = "^0.115.0"
+fastapi = "^0.114.1"
 uvicorn = "^0.30.6"
+joblib = "^1.4.2"
 
 [tool.poetry.dev-dependencies]
 pytest = "^6.2.4"
+pytest-cov = "^5.0.0"
+pytest-mock = "^3.14.0"
 pre-commit = "^3.7.0"
 ruff = "0.6.3"
+black = "^23.1.0"
 bandit = {version = "1.7.7", extras = ["toml"]}
 dvc = {version = "3.55.2", extras = ["s3"]}
 
@@ -32,3 +36,6 @@ exclude_dirs = [
     "__pycache__",
     "venv/",
 ]
+
+[tool.bandit.assert_used]
+skips = ['*/test_*.py']
diff --git a/src/pipelines/features.py b/src/pipelines/features.py
@@ -4,6 +4,8 @@
 from pathlib import Path
 import typer
 from config.settings import PROCESSED_DATA_DIR, RAW_DATA_DIR
+from typing import Optional
+
 
 app = typer.Typer()
 
@@ -70,7 +72,9 @@ def preprocess_data(df: pd.DataFrame):
 
 @app.command()
 def preprocess(
-    input_file: str = None, output_train_file: str = None, output_test_file: str = None
+    input_file: Optional[str] = None,
+    output_train_file: Optional[str] = None,
+    output_test_file: Optional[str] = None,
 ):
     """
     Função principal para o pré-processamento dos dados. Carrega os dados brutos, realiza
@@ -95,7 +99,11 @@ def preprocess(
     output_train_file.parent.mkdir(parents=True, exist_ok=True)
 
     # Carregar dados
-    df = load_data(input_file)
+    try:
+        df = load_data(input_file)
+    except FileNotFoundError:
+        typer.echo("Falha ao carregar os dados. Arquivo não encontrado.", err=True)
+        raise SystemExit(1)
 
     # Pré-processar dados (dividir em treino/teste e escalonar)
     train_data, test_data = preprocess_data(df)
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_features.py b/tests/test_features.py
@@ -0,0 +1,54 @@
+import pytest
+import pandas as pd
+from src.pipelines.features import load_data, preprocess, preprocess_data
+from config.settings import TEST_LOAD_DATA_CSV
+
+
+def test_load_data_success():
+    # Carrega os dados usando a função load_data
+    df = load_data(TEST_LOAD_DATA_CSV)
+
+    # DataFrame esperado
+    expected_df = pd.DataFrame(
+        {
+            "sepal_length": [5.1, 4.9],
+            "sepal_width": [3.5, 3.0],
+            "petal_length": [1.4, 1.4],
+            "petal_width": [0.2, 0.2],
+            "species": ["setosa", "setosa"],
+        }
+    )
+    # Verifica se o DataFrame carregado corresponde ao esperado
+    pd.testing.assert_frame_equal(df, expected_df)
+
+
+def test_preprocess_data_no_species_column():
+    # Dados fictícios sem a coluna 'species' para verificar o comportamento
+    df = pd.DataFrame(
+        {
+            "sepal_length": [5.1, 4.9, 4.7, 4.6],
+            "sepal_width": [3.5, 3.0, 3.2, 3.1],
+            "petal_length": [1.4, 1.4, 1.3, 1.5],
+            "petal_width": [0.2, 0.2, 0.2, 0.2],
+        }
+    )
+
+    with pytest.raises(KeyError):
+        preprocess_data(df)
+
+
+def test_preprocess_failure(capfd):
+    # Chamar a função preprocess e garantir que uma exceção seja levantada
+    with pytest.raises(SystemExit):
+        preprocess(
+            input_file="fake_input.csv",
+            output_train_file="fake_train.csv",
+            output_test_file="fake_test.csv",
+        )
+
+    # Capturar a saída de erro (stderr)
+    captured = capfd.readouterr()
+    print(captured)
+
+    # Verificar se a mensagem de erro está no stderr
+    assert "Falha ao carregar os dados. Arquivo não encontrado." in captured.err
diff --git a/tests/test_load_data.csv b/tests/test_load_data.csv
@@ -0,0 +1,2 @@
+5.1,3.5,1.4,0.2,setosa
+4.9,3.0,1.4,0.2,setosa
diff --git a/tests/test_train.py b/tests/test_train.py
@@ -0,0 +1,23 @@
+from src.pipelines.train import save_model
+from unittest.mock import MagicMock
+
+
+def test_save_model(mocker):
+    # Mock para joblib.dump
+    mock_dump = mocker.patch("joblib.dump")
+
+    # Mock para typer.echo
+    mock_echo = mocker.patch("typer.echo")
+
+    # Modelo fictício
+    mock_model = MagicMock()
+
+    # Caminho fictício para o arquivo
+    output_file = "fake_model.joblib"
+
+    # Chamar a função save_model
+    save_model(mock_model, output_file)
+
+    # Verificar se joblib.dump foi chamado corretamente
+    mock_dump.assert_called_once_with(mock_model, output_file)
+    mock_echo.assert_called_once_with(f"Modelo salvo em: {output_file}")

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+5.1,3.5,1.4,0.2,setosa`
	`2`	`+4.9,3.0,1.4,0.2,setosa`