From c1fa49ef7f05214b301017f8777104ddc602ab76 Mon Sep 17 00:00:00 2001 From: Sowndappan S Date: Thu, 25 Sep 2025 18:24:33 +0530 Subject: [PATCH 1/2] Update decision_tree.py --- machine_learning/decision_tree.py | 40 ++++++++++++++++--------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index 72970431c3fc..4632cdad4d78 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -87,16 +87,14 @@ def train(self, x, y): if y.ndim != 1: raise ValueError("Data set labels must be one-dimensional") - if len(x) < 2 * self.min_leaf_size: - self.prediction = np.mean(y) - return + mean_y = np.mean(y) - if self.depth == 1: - self.prediction = np.mean(y) + if len(x) < 2 * self.min_leaf_size or self.depth == 1: + self.prediction = mean_y return - + best_split = 0 - min_error = self.mean_squared_error(x, np.mean(y)) * 2 + min_error = self.mean_squared_error(x, mean_y) * 2 """ loop over all possible splits for the decision tree. find the best split. @@ -105,17 +103,21 @@ def train(self, x, y): the predictor """ for i in range(len(x)): - if len(x[:i]) < self.min_leaf_size: # noqa: SIM114 - continue - elif len(x[i:]) < self.min_leaf_size: + if len(x[:i]) < self.min_leaf_size or len(x[i:]) < self.min_leaf_size: continue - else: - error_left = self.mean_squared_error(x[:i], np.mean(y[:i])) - error_right = self.mean_squared_error(x[i:], np.mean(y[i:])) - error = error_left + error_right - if error < min_error: - best_split = i - min_error = error + + left_y = y[:i] + right_y = y[i:] + mean_left = np.mean(left_y) + mean_right = np.mean(right_y) + + error_left = self.mean_squared_error(left_y, mean_left) + error_right = self.mean_squared_error(right_y, mean_right) + error = error_left + error_right + + if error < min_error: + best_split = i + min_error = error if best_split != 0: left_x = x[:best_split] @@ -184,7 +186,7 @@ def main(): x = np.arange(-1.0, 1.0, 0.005) y = np.sin(x) - tree = DecisionTree(depth=10, min_leaf_size=10) + tree = DecisionTree(depth=6, min_leaf_size=10) tree.train(x, y) rng = np.random.default_rng() @@ -201,4 +203,4 @@ def main(): main() import doctest - doctest.testmod(name="mean_squarred_error", verbose=True) + doctest.testmod(name="mean_squared_error", verbose=True) From da7a60b77b21a8e7df07383f17d3645f249864c1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:58:36 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/decision_tree.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index 4632cdad4d78..c0d967962965 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -92,7 +92,7 @@ def train(self, x, y): if len(x) < 2 * self.min_leaf_size or self.depth == 1: self.prediction = mean_y return - + best_split = 0 min_error = self.mean_squared_error(x, mean_y) * 2 @@ -105,16 +105,16 @@ def train(self, x, y): for i in range(len(x)): if len(x[:i]) < self.min_leaf_size or len(x[i:]) < self.min_leaf_size: continue - + left_y = y[:i] right_y = y[i:] mean_left = np.mean(left_y) mean_right = np.mean(right_y) - + error_left = self.mean_squared_error(left_y, mean_left) error_right = self.mean_squared_error(right_y, mean_right) error = error_left + error_right - + if error < min_error: best_split = i min_error = error