petrobras · Reinaldo-Kn · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/.gitignore b/.gitignore
@@ -172,3 +172,5 @@ dmypy.json
 # Pyre type checker
 .pyre/
 
+# Temporary files
+temp.py
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+  "editor.formatOnSave": false
+}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -109,4 +109,18 @@ To apply the new logic in the library, it will be necessary to implement the use
 
 ### Additional Features
 
-Preferably, use the `_bibmon_tools.py` file to implement additional features.
+Preferably, use the `_bibmon_tools.py` file to implement additional features.
+
+### Testing New Functionalities
+
+The first step to add new functionalities is to download the testing libraries. To do this, run the following command:
+
+```bash
+pip install -r test/requirements.txt
+```
+
+After implementing the new functionalities, run the tests to ensure that the new code is working correctly. To do this, run the following command:
+
+```bash
+pytest
+```
diff --git a/bibmon/__init__.py b/bibmon/__init__.py
@@ -4,12 +4,12 @@
 from ._sbm import SBM
 from ._sklearn_regressor import sklearnRegressor
 from ._preprocess import PreProcess
-from ._load_data import load_tennessee_eastman, load_real_data
+from ._load_data import load_tennessee_eastman, load_real_data, load_3w
 from ._bibmon_tools import train_val_test_split, complete_analysis, comparative_table, spearmanr_dendrogram, create_df_with_dates, create_df_with_noise, align_dfs_by_rows
 
 __all__ = ['Autoencoder','PCA','ESN','SBM',
 	   'sklearnRegressor', 'PreProcess',
-           'load_tennessee_eastman', 'load_real_data', 
+           'load_tennessee_eastman', 'load_real_data', 'load_3w', 
            'train_val_test_split', 'complete_analysis', 'comparative_table',
 	       'spearmanr_dendrogram', 'create_df_with_dates',
            'create_df_with_noise', 'align_dfs_by_rows']
diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py
@@ -2,6 +2,7 @@
 import pandas as pd
 from datetime import datetime
 import matplotlib.pyplot as plt
+from typing import Literal
 
 ###############################################################################
 
@@ -692,4 +693,86 @@ def comparative_table (models, X_train, X_validation, X_test,
 
         return_tables.append(times_df)
 
-    return return_tables
+    return return_tables
+
+##############################################################################
+
+def find_df_transitions(
+    df: pd.DataFrame,
+    threshold: float = 1,
+    data_type: Literal["string", "number"] = "number",
+    label: str = None,
+) -> list[int]:
+    """
+    Finds transitions in a DataFrame. This can be used to find indices of interesting events in the data.
+
+    Parameters
+    ----------
+    df: pandas.DataFrame
+        Data to be analyzed.
+    threshold: float, optional
+        Threshold to be used in the transition detection, this is the minimum difference between two consecutive points. Will be used only if data_type is 'number'.
+    data_type: str, optional
+        Type of data to be analyzed. If 'number', the threshold will be used to detect transitions. If 'string', the function will look for changes in the values.
+    label: str
+        Label to be used in the transition detection.
+
+    Returns
+    ----------
+    : list of ints
+        Indices of the transitions.
+    """
+
+    if label is None:
+        return []
+
+    transitions = []
+    previous_event = df[label].iloc[0]
+
+    for i in range(1, len(df)):
+        if data_type == "number":
+            if abs(df[label].iloc[i] - previous_event) > threshold:
+                transitions.append(i)
+                previous_event = df[label].iloc[i]
+        elif data_type == "string":
+            if df[label].iloc[i] != previous_event:
+                transitions.append(i)
+                previous_event = df[label].iloc[i]
+
+    return transitions
+
+###############################################################################
+
+def split_df_percentages(df: pd.DataFrame, percentages: list[float]) -> list[pd.DataFrame]:
+    """
+    Splits a DataFrame into multiple DataFrames according to the given percentages, the sum of percentages must equal 1.
+
+    For example, if percentage = [0.6, 0.2, 0.2], the function will return a list with three DataFrames, the first one with 60% of the data, the second one with 20% and the third one with 20%.
+
+    Warning: This function may cause data loss if the split cannot be done exactly according to the percentages.
+
+    Parameters
+    ----------
+    df: pandas.DataFrame
+        Data to be split.
+    percentages: list of floats
+        List of percentages to be used in the split.
+
+    Returns
+    ----------
+    : list of pandas.DataFrames
+        List with the split DataFrames.
+    """
+
+    if sum(percentages) != 1:
+        raise ValueError("The sum of the percentages must be 1.")
+
+    split_dfs = []
+    start = 0
+
+    for i in range(len(percentages)):
+        end = start + int(percentages[i] * len(df))
+        split_dfs.append(df.iloc[start:end])
+        start = end
+
+    return split_dfs