From c1fa49ef7f05214b301017f8777104ddc602ab76 Mon Sep 17 00:00:00 2001
From: Sowndappan S <Sowndappan.s@logesys.com>
Date: Thu, 25 Sep 2025 18:24:33 +0530
Subject: [PATCH 1/2] Update decision_tree.py

---
 machine_learning/decision_tree.py | 40 ++++++++++++++++---------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py
index 72970431c3fc..4632cdad4d78 100644
--- a/machine_learning/decision_tree.py
+++ b/machine_learning/decision_tree.py
@@ -87,16 +87,14 @@ def train(self, x, y):
         if y.ndim != 1:
             raise ValueError("Data set labels must be one-dimensional")
 
-        if len(x) < 2 * self.min_leaf_size:
-            self.prediction = np.mean(y)
-            return
+        mean_y = np.mean(y)
 
-        if self.depth == 1:
-            self.prediction = np.mean(y)
+        if len(x) < 2 * self.min_leaf_size or self.depth == 1:
+            self.prediction = mean_y
             return
-
+        
         best_split = 0
-        min_error = self.mean_squared_error(x, np.mean(y)) * 2
+        min_error = self.mean_squared_error(x, mean_y) * 2
 
         """
         loop over all possible splits for the decision tree. find the best split.
@@ -105,17 +103,21 @@ def train(self, x, y):
         the predictor
         """
         for i in range(len(x)):
-            if len(x[:i]) < self.min_leaf_size:  # noqa: SIM114
-                continue
-            elif len(x[i:]) < self.min_leaf_size:
+            if len(x[:i]) < self.min_leaf_size or len(x[i:]) < self.min_leaf_size:
                 continue
-            else:
-                error_left = self.mean_squared_error(x[:i], np.mean(y[:i]))
-                error_right = self.mean_squared_error(x[i:], np.mean(y[i:]))
-                error = error_left + error_right
-                if error < min_error:
-                    best_split = i
-                    min_error = error
+        
+            left_y = y[:i]
+            right_y = y[i:]
+            mean_left = np.mean(left_y)
+            mean_right = np.mean(right_y)
+        
+            error_left = self.mean_squared_error(left_y, mean_left)
+            error_right = self.mean_squared_error(right_y, mean_right)
+            error = error_left + error_right
+        
+            if error < min_error:
+                best_split = i
+                min_error = error
 
         if best_split != 0:
             left_x = x[:best_split]
@@ -184,7 +186,7 @@ def main():
     x = np.arange(-1.0, 1.0, 0.005)
     y = np.sin(x)
 
-    tree = DecisionTree(depth=10, min_leaf_size=10)
+    tree = DecisionTree(depth=6, min_leaf_size=10)
     tree.train(x, y)
 
     rng = np.random.default_rng()
@@ -201,4 +203,4 @@ def main():
     main()
     import doctest
 
-    doctest.testmod(name="mean_squarred_error", verbose=True)
+    doctest.testmod(name="mean_squared_error", verbose=True)

From da7a60b77b21a8e7df07383f17d3645f249864c1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 25 Sep 2025 12:58:36 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/decision_tree.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py
index 4632cdad4d78..c0d967962965 100644
--- a/machine_learning/decision_tree.py
+++ b/machine_learning/decision_tree.py
@@ -92,7 +92,7 @@ def train(self, x, y):
         if len(x) < 2 * self.min_leaf_size or self.depth == 1:
             self.prediction = mean_y
             return
-        
+
         best_split = 0
         min_error = self.mean_squared_error(x, mean_y) * 2
 
@@ -105,16 +105,16 @@ def train(self, x, y):
         for i in range(len(x)):
             if len(x[:i]) < self.min_leaf_size or len(x[i:]) < self.min_leaf_size:
                 continue
-        
+
             left_y = y[:i]
             right_y = y[i:]
             mean_left = np.mean(left_y)
             mean_right = np.mean(right_y)
-        
+
             error_left = self.mean_squared_error(left_y, mean_left)
             error_right = self.mean_squared_error(right_y, mean_right)
             error = error_left + error_right
-        
+
             if error < min_error:
                 best_split = i
                 min_error = error