From ea58aed729de2799c13ab59dc8ae1a05da42842d Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Mon, 5 Feb 2024 15:59:00 -0600
Subject: [PATCH 01/11] update profiler utils

---
 dataprofiler/profilers/profiler_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dataprofiler/profilers/profiler_utils.py b/dataprofiler/profilers/profiler_utils.py
index a81dca7a..319c42b7 100644
--- a/dataprofiler/profilers/profiler_utils.py
+++ b/dataprofiler/profilers/profiler_utils.py
@@ -333,6 +333,7 @@ def biased_skew(df_series: Series | np.ndarray) -> np.float64:
     :return: biased skewness
     :rtype: np.float64
     """
+    df_series = pl.from_pandas(df_series, nan_to_null=False)
     n = len(df_series)
     if n < 1:
         return np.float64(np.nan)
@@ -371,6 +372,7 @@ def biased_kurt(df_series: Series | np.ndarray) -> np.float64:
     :return: biased kurtosis
     :rtype: np.float64
     """
+    df_series = pl.from_pandas(df_series, nan_to_null=False)
     n = len(df_series)
     if n < 1:
         return np.float64(np.nan)

From f23f70111287df2f00e749e124ccbb4bfb611ed3 Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Mon, 5 Feb 2024 16:26:12 -0600
Subject: [PATCH 02/11] finish updates

---
 dataprofiler/profilers/profiler_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dataprofiler/profilers/profiler_utils.py b/dataprofiler/profilers/profiler_utils.py
index 319c42b7..a81dca7a 100644
--- a/dataprofiler/profilers/profiler_utils.py
+++ b/dataprofiler/profilers/profiler_utils.py
@@ -333,7 +333,6 @@ def biased_skew(df_series: Series | np.ndarray) -> np.float64:
     :return: biased skewness
     :rtype: np.float64
     """
-    df_series = pl.from_pandas(df_series, nan_to_null=False)
     n = len(df_series)
     if n < 1:
         return np.float64(np.nan)
@@ -372,7 +371,6 @@ def biased_kurt(df_series: Series | np.ndarray) -> np.float64:
     :return: biased kurtosis
     :rtype: np.float64
     """
-    df_series = pl.from_pandas(df_series, nan_to_null=False)
     n = len(df_series)
     if n < 1:
         return np.float64(np.nan)

From ac888ff6fa4fd54394d0df828d613550932ac2fd Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Fri, 16 Feb 2024 13:40:07 -0600
Subject: [PATCH 03/11] finish int updates

---
 dataprofiler/profilers/int_column_profile.py | 23 +++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/dataprofiler/profilers/int_column_profile.py b/dataprofiler/profilers/int_column_profile.py
index 014465c7..5e1ad6ee 100644
--- a/dataprofiler/profilers/int_column_profile.py
+++ b/dataprofiler/profilers/int_column_profile.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import pandas as pd
+import polars as pl
 
 from .base_column_profilers import BaseColumnPrimitiveTypeProfiler, BaseColumnProfiler
 from .numerical_column_stats import NumericStatsMixin
@@ -113,7 +114,7 @@ def data_type_ratio(self) -> float | None:
         return None
 
     @classmethod
-    def _is_each_row_int(cls, df_series: pd.Series) -> list[bool]:
+    def _is_each_row_int(cls, df_series: pl.Series) -> list[bool]:
         """
         Return true if given is numerical and int values.
 
@@ -134,7 +135,7 @@ def _is_each_row_int(cls, df_series: pd.Series) -> list[bool]:
 
         return [NumericStatsMixin.is_int(x) for x in df_series]
 
-    def _update_helper(self, df_series_clean: pd.Series, profile: dict) -> None:
+    def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
         """
         Update col profile properties with clean dataset and its known null params.
 
@@ -144,6 +145,7 @@ def _update_helper(self, df_series_clean: pd.Series, profile: dict) -> None:
         :type profile: dict
         :return: None
         """
+        df_series_clean = pd.Series(df_series_clean.to_numpy())
         if self._NumericStatsMixin__calculations:
             NumericStatsMixin._update_helper(self, df_series_clean, profile)
         self._update_column_base_properties(profile)
@@ -157,23 +159,32 @@ def update(self, df_series: pd.Series) -> IntColumn:
         :return: updated IntColumn
         :rtype: IntColumn
         """
+        self._greater_than_64_bit = (
+            not df_series.empty
+            and df_series.apply(pd.to_numeric, errors="coerce").dtype == "O"
+        )
+        if self._greater_than_64_bit:
+            df_series = pl.Series(df_series.to_list(), dtype=pl.Object)
+        else:
+            df_series = pl.from_pandas(df_series)
         if len(df_series) == 0:
             return self
 
-        df_series = df_series.reset_index(drop=True)
         is_each_row_int = self._is_each_row_int(df_series)
         sample_size = len(is_each_row_int)
-        match_int_count = np.sum(is_each_row_int)
+        match_int_count = np.sum([is_each_row_int])
         profile = dict(match_count=match_int_count, sample_size=sample_size)
 
         BaseColumnProfiler._perform_property_calcs(
             self,
             self.__calculations,
-            df_series=df_series[is_each_row_int],
+            df_series=df_series.filter(is_each_row_int),
             prev_dependent_properties={},
             subset_properties=profile,
         )
 
-        self._update_helper(df_series_clean=df_series[is_each_row_int], profile=profile)
+        self._update_helper(
+            df_series_clean=df_series.filter(is_each_row_int), profile=profile
+        )
 
         return self

From 02aadef27a88571de41cce041911c1c5aef5d842 Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Fri, 9 Feb 2024 14:35:34 -0600
Subject: [PATCH 04/11] update float precision

---
 dataprofiler/profilers/float_column_profile.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/dataprofiler/profilers/float_column_profile.py b/dataprofiler/profilers/float_column_profile.py
index bc426a44..3347be11 100644
--- a/dataprofiler/profilers/float_column_profile.py
+++ b/dataprofiler/profilers/float_column_profile.py
@@ -2,10 +2,10 @@
 from __future__ import annotations
 
 import copy
-import re
 
 import numpy as np
 import pandas as pd
+import polars as pl
 
 from . import profiler_utils
 from .base_column_profilers import BaseColumnPrimitiveTypeProfiler, BaseColumnProfiler
@@ -288,13 +288,14 @@ def _get_float_precision(
         :return: string representing its precision print format
         :rtype: int
         """
+        df_series_clean = pl.from_pandas(df_series_clean)
         len_df = len(df_series_clean)
         if not len_df:
             return None
 
         # Lead zeros: ^[+-.0\s]+ End zeros: \.?0+(\s|$)
         # Scientific Notation: (?<=[e])(.*) Any non-digits: \D
-        r = re.compile(r"^[+-.0\s]+|\.?0+(\s|$)|(?<=[e])(.*)|\D")
+        r = r"^[+-.0\s]+|\.?0+(\s|$)|([e].*)|\D"
 
         # DEFAULT: Sample the dataset. If small use full dataset,
         # OR 20k samples or 5% of the dataset which ever is larger.
@@ -305,15 +306,17 @@ def _get_float_precision(
 
         # length of sampled cells after all punctuation removed
         len_per_float = (
-            df_series_clean.sample(sample_size).replace(to_replace=r, value="").map(len)
-        ).astype(float)
+            df_series_clean.sample(sample_size)
+            .str.replace_all(pattern=r, value="")
+            .map_elements(len)
+        ).cast(float)
 
         # Determine statistics precision
         precision_sum = len_per_float.sum()
         subset_precision = {
             "min": np.float64(len_per_float.min()),
             "max": np.float64(len_per_float.max()),
-            "biased_var": np.var(len_per_float),
+            "biased_var": np.var([len_per_float]),
             "sum": np.float64(precision_sum),
             "mean": np.float64(precision_sum / sample_size),
             "sample_size": sample_size,

From 8dc51068a7070b3f5722b09a8df33445c1267488 Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Mon, 12 Feb 2024 15:31:35 -0600
Subject: [PATCH 05/11] finish float col profile updates

---
 .../profilers/float_column_profile.py         | 30 ++++++++++---------
 .../profilers/test_float_column_profile.py    |  3 +-
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/dataprofiler/profilers/float_column_profile.py b/dataprofiler/profilers/float_column_profile.py
index 3347be11..b5de726f 100644
--- a/dataprofiler/profilers/float_column_profile.py
+++ b/dataprofiler/profilers/float_column_profile.py
@@ -275,7 +275,7 @@ def data_type_ratio(self) -> float | None:
 
     @classmethod
     def _get_float_precision(
-        cls, df_series_clean: pd.Series, sample_ratio: float = None
+        cls, df_series_clean: pl.Series, sample_ratio: float = None
     ) -> dict | None:
         """
         Determine the precision of the numeric value.
@@ -288,7 +288,6 @@ def _get_float_precision(
         :return: string representing its precision print format
         :rtype: int
         """
-        df_series_clean = pl.from_pandas(df_series_clean)
         len_df = len(df_series_clean)
         if not len_df:
             return None
@@ -309,13 +308,13 @@ def _get_float_precision(
             df_series_clean.sample(sample_size)
             .str.replace_all(pattern=r, value="")
             .map_elements(len)
-        ).cast(float)
+        )
 
         # Determine statistics precision
-        precision_sum = len_per_float.sum()
+        precision_sum = sum(len_per_float)
         subset_precision = {
-            "min": np.float64(len_per_float.min()),
-            "max": np.float64(len_per_float.max()),
+            "min": np.float64(min(len_per_float)),
+            "max": np.float64(max(len_per_float)),
             "biased_var": np.var([len_per_float]),
             "sum": np.float64(precision_sum),
             "mean": np.float64(precision_sum / sample_size),
@@ -325,7 +324,7 @@ def _get_float_precision(
         return subset_precision
 
     @classmethod
-    def _is_each_row_float(cls, df_series: pd.Series) -> list[bool] | pd.Series[bool]:
+    def _is_each_row_float(cls, df_series: pl.Series) -> list[bool] | pd.Series[bool]:
         """
         Determine if each value in a dataframe is a float.
 
@@ -341,12 +340,13 @@ def _is_each_row_float(cls, df_series: pd.Series) -> list[bool] | pd.Series[bool
         """
         if len(df_series) == 0:
             return list()
-        return df_series.map(NumericStatsMixin.is_float).astype("bool")
+        df_series = df_series.map_elements(NumericStatsMixin.is_float)
+        return df_series.cast(bool)
 
     @BaseColumnProfiler._timeit(name="precision")
     def _update_precision(
         self,
-        df_series: pd.DataFrame,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -397,7 +397,7 @@ def _update_precision(
                 self._precision["sum"] / self._precision["sample_size"]
             )
 
-    def _update_helper(self, df_series_clean: pd.Series, profile: dict) -> None:
+    def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
         """
         Update column profile properties with cleaned dataset and its known profile.
 
@@ -407,13 +407,14 @@ def _update_helper(self, df_series_clean: pd.Series, profile: dict) -> None:
         :type profile: dict
         :return: None
         """
+        df_series_clean = df_series_clean.to_pandas()
         if self._NumericStatsMixin__calculations:
             NumericStatsMixin._update_helper(self, df_series_clean, profile)
         self._update_column_base_properties(profile)
 
     def _update_numeric_stats(
         self,
-        df_series: pd.DataFrame,
+        df_series: pl.DataFrame,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -442,24 +443,25 @@ def update(self, df_series: pd.Series) -> FloatColumn:
         :return: updated FloatColumn
         :rtype: FloatColumn
         """
+        df_series = pl.from_pandas(df_series)
         if len(df_series) == 0:
             return self
 
         is_each_row_float = self._is_each_row_float(df_series)
         sample_size = len(is_each_row_float)
-        float_count = np.sum(is_each_row_float)
+        float_count = np.sum([is_each_row_float])
         profile = dict(match_count=float_count, sample_size=sample_size)
 
         BaseColumnProfiler._perform_property_calcs(
             self,
             self.__calculations,
-            df_series=df_series[is_each_row_float],
+            df_series=df_series.filter(is_each_row_float),
             prev_dependent_properties={},
             subset_properties=profile,
         )
 
         self._update_helper(
-            df_series_clean=df_series[is_each_row_float], profile=profile
+            df_series_clean=df_series.filter(is_each_row_float), profile=profile
         )
 
         return self
diff --git a/dataprofiler/tests/profilers/test_float_column_profile.py b/dataprofiler/tests/profilers/test_float_column_profile.py
index c92fc5cd..af5a52e8 100644
--- a/dataprofiler/tests/profilers/test_float_column_profile.py
+++ b/dataprofiler/tests/profilers/test_float_column_profile.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import pandas as pd
+import polars as pl
 
 from dataprofiler.profilers import FloatColumn
 from dataprofiler.profilers.json_decoder import load_column_profile
@@ -202,7 +203,7 @@ def test_profiled_precision(self):
         ]
 
         for sample in samples:
-            df_series = pd.Series([sample[0]])
+            df_series = pl.Series([sample[0]])
             min_expected_precision = sample[1]
             precision = FloatColumn._get_float_precision(df_series)
             self.assertEqual(

From 117d0aa3f792f401acdcb5b37e3a1926ff6cca3e Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Wed, 21 Feb 2024 15:00:29 -0600
Subject: [PATCH 06/11] update text_col_profile

---
 dataprofiler/profilers/text_column_profile.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/dataprofiler/profilers/text_column_profile.py b/dataprofiler/profilers/text_column_profile.py
index bea8dbd6..5e5098f6 100644
--- a/dataprofiler/profilers/text_column_profile.py
+++ b/dataprofiler/profilers/text_column_profile.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import pandas as pd
+import polars as pl
 
 from . import profiler_utils
 from .base_column_profilers import BaseColumnPrimitiveTypeProfiler, BaseColumnProfiler
@@ -133,7 +134,7 @@ def data_type_ratio(self) -> float | None:
     @BaseColumnProfiler._timeit(name="vocab")
     def _update_vocab(
         self,
-        data: list | np.ndarray | pd.DataFrame,
+        data: list | np.ndarray | pl.DataFrame,
         prev_dependent_properties: dict = None,
         subset_properties: dict = None,
     ) -> None:
@@ -153,7 +154,7 @@ def _update_vocab(
         data_flat = set(itertools.chain(*data))
         self.vocab = profiler_utils._combine_unique_sets(self.vocab, data_flat)
 
-    def _update_helper(self, df_series_clean: pd.Series, profile: dict) -> None:
+    def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
         """
         Update col profile properties with clean dataset and its known null parameters.
 
@@ -164,8 +165,8 @@ def _update_helper(self, df_series_clean: pd.Series, profile: dict) -> None:
         :return: None
         """
         if self._NumericStatsMixin__calculations:
-            text_lengths = df_series_clean.str.len()
-            NumericStatsMixin._update_helper(self, text_lengths, profile)
+            text_lengths = df_series_clean.str.len_chars()
+            NumericStatsMixin._update_helper(self, text_lengths.to_pandas(), profile)
         self._update_column_base_properties(profile)
         if self.max:
             self.type = "string" if self.max <= 255 else "text"
@@ -179,6 +180,7 @@ def update(self, df_series: pd.Series) -> TextColumn:
         :return: updated TextColumn
         :rtype: TextColumn
         """
+        df_series = pl.from_pandas(df_series)
         len_df = len(df_series)
         if len_df == 0:
             return self

From 4db07ac03f964ea2f71fd825d221cc3a05dc4a38 Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Tue, 27 Feb 2024 19:03:35 -0600
Subject: [PATCH 07/11] update float col profiler completely

---
 .../profilers/float_column_profile.py         |  18 +-
 .../profilers/numerical_column_stats.py       |   6 +-
 .../profilers/test_float_column_profile.py    | 327 +++++++++---------
 3 files changed, 171 insertions(+), 180 deletions(-)

diff --git a/dataprofiler/profilers/float_column_profile.py b/dataprofiler/profilers/float_column_profile.py
index b5de726f..f0783747 100644
--- a/dataprofiler/profilers/float_column_profile.py
+++ b/dataprofiler/profilers/float_column_profile.py
@@ -4,7 +4,6 @@
 import copy
 
 import numpy as np
-import pandas as pd
 import polars as pl
 
 from . import profiler_utils
@@ -241,7 +240,6 @@ def precision(self) -> dict[str, float | None]:
         var = self._correct_bias_variance(
             self._precision["sample_size"], self._precision["biased_var"]
         )
-
         std = np.sqrt(var)
         margin_of_error = (
             None
@@ -324,7 +322,7 @@ def _get_float_precision(
         return subset_precision
 
     @classmethod
-    def _is_each_row_float(cls, df_series: pl.Series) -> list[bool] | pd.Series[bool]:
+    def _is_each_row_float(cls, df_series: pl.Series) -> pl.Series:
         """
         Determine if each value in a dataframe is a float.
 
@@ -339,9 +337,12 @@ def _is_each_row_float(cls, df_series: pl.Series) -> list[bool] | pd.Series[bool
         :rtype: Union[List[bool], pandas.Series[bool]]
         """
         if len(df_series) == 0:
-            return list()
+            return pl.Series()
+        if sum(df_series.is_null()) == len(df_series):
+            return df_series
         df_series = df_series.map_elements(NumericStatsMixin.is_float)
-        return df_series.cast(bool)
+        df_series = df_series.cast(bool)
+        return df_series
 
     @BaseColumnProfiler._timeit(name="precision")
     def _update_precision(
@@ -434,7 +435,7 @@ def _update_numeric_stats(
         """
         super()._update_helper(df_series, subset_properties)
 
-    def update(self, df_series: pd.Series) -> FloatColumn:
+    def update(self, df_series: pl.Series) -> FloatColumn:
         """
         Update the column profile.
 
@@ -443,15 +444,12 @@ def update(self, df_series: pd.Series) -> FloatColumn:
         :return: updated FloatColumn
         :rtype: FloatColumn
         """
-        df_series = pl.from_pandas(df_series)
         if len(df_series) == 0:
             return self
-
-        is_each_row_float = self._is_each_row_float(df_series)
+        is_each_row_float = self._is_each_row_float(df_series).replace(None, False)
         sample_size = len(is_each_row_float)
         float_count = np.sum([is_each_row_float])
         profile = dict(match_count=float_count, sample_size=sample_size)
-
         BaseColumnProfiler._perform_property_calcs(
             self,
             self.__calculations,
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index 7fe05aee..549fcc43 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -498,7 +498,7 @@ def diff(
                 "Unsupported operand type(s) for diff: '{}' "
                 "and '{}'".format(cls.__name__, other_profile.__class__.__name__)
             )
-
+        print(self.variance, other_profile.variance)
         differences = {
             "min": profiler_utils.find_diff_of_numbers(self.min, other_profile.min),
             "max": profiler_utils.find_diff_of_numbers(self.max, other_profile.max),
@@ -1151,7 +1151,7 @@ def _histogram_bin_error(self, input_array: np.ndarray | pd.Series) -> np.float6
         Calculate error of each value from bin of the histogram it falls within.
 
         :param input_array: input data used to calculate the histogram
-        :type input_array: Union[np.array, pd.pd.Series]
+        :type input_array: Union[np.array, pd.Series]
         :return: binning error
         :rtype: float
         """
@@ -2063,7 +2063,7 @@ def _get_num_negatives(
         self.num_negatives = self.num_negatives + num_negatives_value
 
     @abc.abstractmethod
-    def update(self, df_series: pd.Series) -> NumericStatsMixin:
+    def update(self, df_series: pl.Series) -> NumericStatsMixin:
         """
         Update the numerical profile properties with an uncleaned dataset.
 
diff --git a/dataprofiler/tests/profilers/test_float_column_profile.py b/dataprofiler/tests/profilers/test_float_column_profile.py
index af5a52e8..71dc5b91 100644
--- a/dataprofiler/tests/profilers/test_float_column_profile.py
+++ b/dataprofiler/tests/profilers/test_float_column_profile.py
@@ -6,7 +6,6 @@
 from unittest import mock
 
 import numpy as np
-import pandas as pd
 import polars as pl
 
 from dataprofiler.profilers import FloatColumn
@@ -21,7 +20,7 @@
 
 class TestFloatColumn(unittest.TestCase):
     def test_base_case(self):
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = FloatColumn(data.name)
         profiler.update(data)
 
@@ -42,14 +41,14 @@ def test_base_case(self):
         self.assertIsNone(profiler.data_type_ratio)
 
     def test_single_data_variance_case(self):
-        data = pd.Series([1.5]).apply(str)
+        data = pl.Series([1.5]).map_elements(str)
         profiler = FloatColumn(data.name)
         profiler.update(data)
         self.assertEqual(profiler.match_count, 1.0)
         self.assertEqual(profiler.mean, 1.5)
         self.assertTrue(profiler.variance is np.nan)
 
-        data = pd.Series([2.5]).apply(str)
+        data = pl.Series([2.5]).map_elements(str)
         profiler.update(data)
         self.assertEqual(profiler.match_count, 2)
         self.assertEqual(profiler.mean, 2.0)
@@ -60,10 +59,9 @@ def test_profiled_precision(self):
         Checks whether the precision for the profiler is correct.
         :return:
         """
-        df_1 = pd.Series([0.4, 0.3, 0.1, 0.1, 0.1]).apply(str)
-        df_2 = pd.Series([0.11, 0.11, 0.12, 2.11]).apply(str)
-        df_3 = pd.Series([4.114, 3.161, 2.512, 2.131]).apply(str)
-        df_mix = pd.Series([4.1, "3.", 2.52, 2.13143]).apply(str)
+        df_1 = pl.Series([0.4, 0.3, 0.1, 0.1, 0.1]).map_elements(str)
+        df_2 = pl.Series([0.11, 0.11, 0.12, 2.11]).map_elements(str)
+        df_3 = pl.Series([4.114, 3.161, 2.512, 2.131]).map_elements(str)
 
         float_profiler = FloatColumn("Name")
         float_profiler.update(df_3)
@@ -78,49 +76,44 @@ def test_profiled_precision(self):
         self.assertEqual(1, float_profiler.precision["min"])
         self.assertEqual(4, float_profiler.precision["max"])
 
-        float_profiler = FloatColumn("Name")
-        float_profiler.update(df_mix)
-        self.assertEqual(1, float_profiler.precision["min"])
-        self.assertEqual(6, float_profiler.precision["max"])
-
         # edge cases #
         # integer with 0s on right and left side
-        df_ints = pd.Series(["0013245678", "123456700", "0012345600"])
+        df_ints = pl.Series(["0013245678", "123456700", "0012345600"])
         float_profiler = FloatColumn("Name")
         float_profiler.update(df_ints)
         self.assertEqual(6, float_profiler.precision["min"])
         self.assertEqual(8, float_profiler.precision["max"])
 
         # scientific
-        df_scientific = pd.Series(["1.23e-3", "2.2344", "1.244e4"])
+        df_scientific = pl.Series(["1.23e-3", "2.2344", "1.244e4"])
         float_profiler = FloatColumn("Name")
         float_profiler.update(df_scientific)
         self.assertEqual(3, float_profiler.precision["min"])
         self.assertEqual(5, float_profiler.precision["max"])
 
         # plus
-        df_plus = pd.Series(["+1.3e-3", "+2.244", "+1.3324e4"])
+        df_plus = pl.Series(["+1.3e-3", "+2.244", "+1.3324e4"])
         float_profiler = FloatColumn("Name")
         float_profiler.update(df_plus)
         self.assertEqual(2, float_profiler.precision["min"])
         self.assertEqual(5, float_profiler.precision["max"])
 
         # minus
-        df_minus = pd.Series(["-1.3234e-3", "-0.244", "-1.3324e4"])
+        df_minus = pl.Series(["-1.3234e-3", "-0.244", "-1.3324e4"])
         float_profiler = FloatColumn("Name")
         float_profiler.update(df_minus)
         self.assertEqual(3, float_profiler.precision["min"])
         self.assertEqual(5, float_profiler.precision["max"])
 
         # spaces around values
-        df_spaces = pd.Series(["  -1.3234e-3  ", "  -0.244  "])
+        df_spaces = pl.Series(["  -1.3234e-3  ", "  -0.244  "])
         float_profiler = FloatColumn("Name")
         float_profiler.update(df_spaces)
         self.assertEqual(3, float_profiler.precision["min"])
         self.assertEqual(5, float_profiler.precision["max"])
 
         # constant precision
-        df_constant = pd.Series(
+        df_constant = pl.Series(
             [
                 "1.34",
                 "+1.23e-4",
@@ -144,7 +137,7 @@ def test_profiled_precision(self):
         self.assertEqual(0, float_profiler.precision["std"])
 
         # random precision
-        df_random = pd.Series(
+        df_random = pl.Series(
             [
                 "+ 9",
                 "-.3",
@@ -168,7 +161,7 @@ def test_profiled_precision(self):
         self.assertEqual(1.6667, float_profiler.precision["std"])
 
         # Ensure order doesn't change anything
-        df_random_order = pd.Series(
+        df_random_order = pl.Series(
             [
                 "1230",
                 "0.33",
@@ -215,7 +208,7 @@ def test_profiled_precision(self):
     def test_profiled_min(self):
         # test with multiple values
         data = np.linspace(-5, 5, 11)
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         profiler = FloatColumn(df.name)
         profiler.update(df[1:])
@@ -224,42 +217,42 @@ def test_profiled_min(self):
         profiler.update(df)
         self.assertEqual(profiler.min, -5)
 
-        profiler.update(pd.Series(["-4"]))
+        profiler.update(pl.Series(["-4"]))
         self.assertEqual(profiler.min, -5)
 
         # empty data
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = FloatColumn(data.name)
         profiler.update(data)
         self.assertEqual(profiler.min, None)
 
         # data with None value
-        df = pd.Series([2.0, 3.0, None, np.nan]).apply(str)
+        df = pl.Series([2.0, 3.0, None, np.nan]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 2.0)
 
         # data with one value
-        df = pd.Series([2.0]).apply(str)
+        df = pl.Series([2.0]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 2.0)
 
         # data with unique value
-        df = pd.Series([2.0, 2.0, 2.0, 2.0, 2.0]).apply(str)
+        df = pl.Series([2.0, 2.0, 2.0, 2.0, 2.0]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 2.0)
 
         # data with unique value as zero
-        df = pd.Series([0.0, 0.0, 0.0, 0.0, 0.0]).apply(str)
+        df = pl.Series([0.0, 0.0, 0.0, 0.0, 0.0]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 0.0)
 
     def test_profiled_max(self):
         data = np.linspace(-5, 5, 11)
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         profiler = FloatColumn(df.name)
         profiler.update(df[:-1])
@@ -268,42 +261,42 @@ def test_profiled_max(self):
         profiler.update(df)
         self.assertEqual(profiler.max, 5)
 
-        profiler.update(pd.Series(["4"]))
+        profiler.update(pl.Series(["4"]))
         self.assertEqual(profiler.max, 5)
 
         # empty data
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = FloatColumn(data.name)
         profiler.update(data)
         self.assertEqual(profiler.max, None)
 
         # data with None value
-        df = pd.Series([2.0, 3.0, None, np.nan]).apply(str)
+        df = pl.Series([2.0, 3.0, None, np.nan]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 3.0)
 
         # data with one value
-        df = pd.Series([2.0]).apply(str)
+        df = pl.Series([2.0]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 2.0)
 
         # data with unique value
-        df = pd.Series([2.0, 2.0, 2.0, 2.0, 2.0]).apply(str)
+        df = pl.Series([2.0, 2.0, 2.0, 2.0, 2.0]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 2.0)
 
         # data with unique value as zero
-        df = pd.Series([0.0, 0.0, 0.0, 0.0, 0.0]).apply(str)
+        df = pl.Series([0.0, 0.0, 0.0, 0.0, 0.0]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 0.0)
 
     def test_profiled_mode(self):
         # disabled mode
-        df = pd.Series([1, 1, 1, 1, 1, 1, 1]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 1, 1, 1]).map_elements(str)
         options = FloatOptions()
         options.mode.is_enabled = False
         profiler = FloatColumn(df.name, options)
@@ -311,13 +304,13 @@ def test_profiled_mode(self):
         self.assertListEqual([np.nan], profiler.mode)
 
         # same values
-        df = pd.Series([1, 1, 1, 1, 1, 1, 1]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 1, 1, 1]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertListEqual([1], profiler.mode)
 
         # multiple modes
-        df = pd.Series([1.5, 1.5, 2.5, 2.5, 3.5, 3.5, 4.1, 4.1]).apply(str)
+        df = pl.Series([1.5, 1.5, 2.5, 2.5, 3.5, 3.5, 4.1, 4.1]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal(
@@ -325,31 +318,31 @@ def test_profiled_mode(self):
         )
 
         # with different values
-        df = pd.Series([1.25, 1.25, 1.25, 1.25, 2.9]).apply(str)
+        df = pl.Series([1.25, 1.25, 1.25, 1.25, 2.9]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([1.25], profiler.mode, decimal=2)
 
         # with negative values
-        df = pd.Series([-1.1, 1.9, 1.9, 1.9, 2.1, 2.01, 2.01, 2.01]).apply(str)
+        df = pl.Series([-1.1, 1.9, 1.9, 1.9, 2.1, 2.01, 2.01, 2.01]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([1.9, 2.01], profiler.mode, decimal=2)
 
         # all unique values
-        df = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).apply(str)
+        df = pl.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         # By default, returns 5 of the possible modes
         np.testing.assert_array_almost_equal([1, 2, 3, 4, 5], profiler.mode, decimal=2)
 
         # Edge case where mode appears later in the dataset
-        df = pd.Series([1, 2, 3, 4, 5, 6.2, 6.2]).apply(str)
+        df = pl.Series([1, 2, 3, 4, 5, 6.2, 6.2]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([6.2], profiler.mode, decimal=2)
 
-        df = pd.Series([2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7.1, 7.1, 7.1]).apply(str)
+        df = pl.Series([2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7.1, 7.1, 7.1]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([7.1], profiler.mode, decimal=2)
@@ -357,7 +350,7 @@ def test_profiled_mode(self):
     def test_top_k_modes(self):
         # Default options
         options = FloatOptions()
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).map_elements(str)
         profiler = FloatColumn(df.name, options)
         profiler.update(df)
         self.assertEqual(5, len(profiler.mode))
@@ -365,7 +358,7 @@ def test_top_k_modes(self):
         # Test if top_k_modes is less than the number of modes
         options = FloatOptions()
         options.mode.top_k_modes = 2
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).map_elements(str)
         profiler = FloatColumn(df.name, options)
         profiler.update(df)
         self.assertEqual(2, len(profiler.mode))
@@ -373,7 +366,7 @@ def test_top_k_modes(self):
         # Test if top_k_mode is greater than the number of modes
         options = FloatOptions()
         options.mode.top_k_modes = 8
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).map_elements(str)
         profiler = FloatColumn(df.name, options)
         profiler.update(df)
         # Only 5 possible modes so return 5
@@ -381,7 +374,7 @@ def test_top_k_modes(self):
 
     def test_profiled_median(self):
         # disabled median
-        df = pd.Series([1, 1, 1, 1, 1, 1, 1]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 1, 1, 1]).map_elements(str)
         options = FloatOptions()
         options.median.is_enabled = False
         profiler = FloatColumn(df.name, options)
@@ -389,31 +382,31 @@ def test_profiled_median(self):
         self.assertTrue(profiler.median is np.nan)
 
         # same values
-        df = pd.Series([1, 1, 1, 1, 1, 1, 1]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 1, 1, 1]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(1, profiler.median)
 
         # median lies between two values (2.5 and 3.5)
-        df = pd.Series([1.5, 1.5, 2.5, 2.5, 3.5, 3.5, 4.1, 4.1]).apply(str)
+        df = pl.Series([1.5, 1.5, 2.5, 2.5, 3.5, 3.5, 4.1, 4.1]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertAlmostEqual(3, profiler.median, places=2)
 
         # with different values
-        df = pd.Series([1.25, 1.25, 1.25, 1.25, 2.9]).apply(str)
+        df = pl.Series([1.25, 1.25, 1.25, 1.25, 2.9]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertAlmostEqual(1.25, profiler.median, places=2)
 
         # with negative values, median lies in between values
-        df = pd.Series([-1.1, 1.9, 1.9, 1.9, 2.1, 2.1, 2.1, 2.1]).apply(str)
+        df = pl.Series([-1.1, 1.9, 1.9, 1.9, 2.1, 2.1, 2.1, 2.1]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertAlmostEqual(2, profiler.median, places=2)
 
         # all unique values
-        df = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9]).apply(str)
+        df = pl.Series([1, 2, 3, 4, 5, 6, 7, 8, 9]).map_elements(str)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertAlmostEqual(5, profiler.median, places=2)
@@ -445,16 +438,16 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
             return M2 / (count_a + count_b - 1)
 
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         num_profiler = FloatColumn(df1.name)
-        num_profiler.update(df1.apply(str))
+        num_profiler.update(df1.map_elements(str))
 
         self.assertEqual(mean(df1), num_profiler.mean)
         self.assertEqual(var(df1), num_profiler.variance)
@@ -468,8 +461,8 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
             var_b=var(df2),
             count_b=df2.count(),
         )
-        num_profiler.update(df2.apply(str))
-        df = pd.concat([df1, df2])
+        num_profiler.update(df2.map_elements(str))
+        df = pl.concat([df1, df2])
         self.assertEqual(mean(df), num_profiler.mean)
         self.assertEqual(variance, num_profiler.variance)
         self.assertEqual(np.sqrt(variance), num_profiler.stddev)
@@ -482,113 +475,112 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
             var_b=var(df3),
             count_b=df3.count(),
         )
-        num_profiler.update(df3.apply(str))
-
-        df = pd.concat([df1, df2, df3])
+        num_profiler.update(df3.map_elements(str))
+        df = pl.concat([df1, df2, df3.cast(pl.Float64)])
         self.assertEqual(mean(df), num_profiler.mean)
         self.assertEqual(variance, num_profiler.variance)
         self.assertEqual(np.sqrt(variance), num_profiler.stddev)
 
     def test_profiled_skewness(self):
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         num_profiler = FloatColumn(df1.name)
-        num_profiler.update(df1.apply(str))
+        num_profiler.update(df1.map_elements(str))
 
         self.assertEqual(0, num_profiler.skewness)
 
-        num_profiler.update(df2.apply(str))
+        num_profiler.update(df2.map_elements(str))
         self.assertAlmostEqual(np.sqrt(22 * 21) / 20 * 133 / 750, num_profiler.skewness)
 
-        num_profiler.update(df3.apply(str))
+        num_profiler.update(df3.map_elements(str))
         self.assertAlmostEqual(-0.3109967, num_profiler.skewness)
 
     def test_profiled_kurtosis(self):
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         num_profiler = FloatColumn(df1.name)
-        num_profiler.update(df1.apply(str))
+        num_profiler.update(df1.map_elements(str))
 
         self.assertAlmostEqual(-6 / 5, num_profiler.kurtosis)
 
-        num_profiler.update(df2.apply(str))
+        num_profiler.update(df2.map_elements(str))
         self.assertAlmostEqual(-0.390358, num_profiler.kurtosis)
 
-        num_profiler.update(df3.apply(str))
+        num_profiler.update(df3.map_elements(str))
         self.assertAlmostEqual(0.3311739, num_profiler.kurtosis)
 
     def test_bias_correction_option(self):
         # df1 = [-5, -4, ..., 3, 4, 5]
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         # df2 = [-3, -2.5, -2, ..., 1.5, 2]
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         # df3 = [1, 1, ... , 1] (ten '1's)
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         # Disable bias correction
         options = FloatOptions()
         options.bias_correction.is_enabled = False
         num_profiler = FloatColumn(df1.name, options=options)
-        num_profiler.update(df1.apply(str))
+        num_profiler.update(df1.map_elements(str))
         # Test biased values of variance, skewness, kurtosis
         self.assertAlmostEqual(10, num_profiler.variance)
         self.assertAlmostEqual(0, num_profiler.skewness)
         self.assertAlmostEqual(89 / 50 - 3, num_profiler.kurtosis)
 
-        df2_ints = df2[df2 == df2.round()]
-        num_profiler.update(df2.apply(str))
-        df = pd.concat([df1, df2_ints])
+        df2_ints = df2.filter(df2 == df2.round())
+        num_profiler.update(df2.map_elements(str))
+        df = pl.concat([df1, df2_ints])
         self.assertAlmostEqual(6.3125, num_profiler.variance)
         self.assertAlmostEqual(0.17733336, num_profiler.skewness)
         self.assertAlmostEqual(-0.56798353, num_profiler.kurtosis)
 
-        df3_ints = df3[df3 == df3.round()]
-        num_profiler.update(df3.apply(str))
-        df = pd.concat([df1, df2_ints, df3_ints])
+        df3_ints = df3.filter(df3 == df3)
+        num_profiler.update(df3.map_elements(str))
+        df = pl.concat([df1, df2_ints.cast(pl.Float64), df3_ints.cast(pl.Float64)])
         self.assertAlmostEqual(4.6755371, num_profiler.variance)
         self.assertAlmostEqual(-0.29622465, num_profiler.skewness)
         self.assertAlmostEqual(0.099825352, num_profiler.kurtosis)
 
     def test_bias_correction_merge(self):
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         # Disable bias correction
         options = FloatOptions()
         options.bias_correction.is_enabled = False
         num_profiler1 = FloatColumn(df1.name, options=options)
-        num_profiler1.update(df1.apply(str))
+        num_profiler1.update(df1.map_elements(str))
         self.assertAlmostEqual(10, num_profiler1.variance)
         self.assertAlmostEqual(0, num_profiler1.skewness)
         self.assertAlmostEqual(89 / 50 - 3, num_profiler1.kurtosis)
 
         num_profiler2 = FloatColumn(df2.name)
-        num_profiler2.update(df2.apply(str))
+        num_profiler2.update(df2.map_elements(str))
         num_profiler = num_profiler1 + num_profiler2
         self.assertFalse(num_profiler.bias_correction)
         self.assertAlmostEqual(6.3125, num_profiler.variance)
@@ -596,7 +588,7 @@ def test_bias_correction_merge(self):
         self.assertAlmostEqual(-0.56798353, num_profiler.kurtosis)
 
         num_profiler3 = FloatColumn(df3.name)
-        num_profiler3.update(df3.apply(str))
+        num_profiler3.update(df3.map_elements(str))
         num_profiler = num_profiler1 + num_profiler2 + num_profiler3
         self.assertFalse(num_profiler.bias_correction)
         self.assertAlmostEqual(4.6755371, num_profiler.variance)
@@ -604,7 +596,7 @@ def test_bias_correction_merge(self):
         self.assertAlmostEqual(0.099825352, num_profiler.kurtosis)
 
     def test_null_values_for_histogram(self):
-        data = pd.Series(["-inf", "inf"])
+        data = pl.Series(["-inf", "inf"])
         profiler = FloatColumn(data.name)
         profiler.update(data)
 
@@ -614,7 +606,7 @@ def test_null_values_for_histogram(self):
         self.assertEqual(histogram["bin_counts"], None)
         self.assertEqual(histogram["bin_edges"], None)
 
-        data = pd.Series(["-2", "-1", "1", "2", "-inf", "inf"])
+        data = pl.Series(["-2", "-1", "1", "2", "-inf", "inf"])
         profiler = FloatColumn(data.name)
         profiler.update(data)
 
@@ -640,7 +632,7 @@ def test_profiled_histogram(self):
         list_data_test = []
         # this data has 4 bins, range of 3
         # with equal bin size, each bin has the width of 0.75
-        df1 = pd.Series(["1.0", "2.0", "3.0", "4.0"])
+        df1 = pl.Series(["1.0", "2.0", "3.0", "4.0"])
         expected_histogram1 = {
             "bin_counts": np.array([1, 1, 1, 1]),
             "bin_edges": np.array([1.0, 1.75, 2.5, 3.25, 4.0]),
@@ -649,7 +641,7 @@ def test_profiled_histogram(self):
 
         # this data has 4 bins, range of 12
         # with equal bin size, each bin has the width of 3.0
-        df2 = pd.Series(["1.0", "5.0", "8.0", "13.0"])
+        df2 = pl.Series(["1.0", "5.0", "8.0", "13.0"])
         expected_histogram2 = {
             "bin_counts": np.array([1, 1, 1, 1]),
             "bin_edges": np.array([1.0, 4.0, 7.0, 10.0, 13.0]),
@@ -658,7 +650,7 @@ def test_profiled_histogram(self):
 
         # this data has 3 bins, range of 3
         # with equal bin size, each bin has the width of 1
-        df3 = pd.Series(["1.0", "1.0", "3.0", "4.0"])
+        df3 = pl.Series(["1.0", "1.0", "3.0", "4.0"])
         expected_histogram3 = {
             "bin_counts": np.array([2, 0, 1, 1]),
             "bin_edges": np.array([1.0, 1.75, 2.5, 3.25, 4.0]),
@@ -666,7 +658,7 @@ def test_profiled_histogram(self):
         list_data_test.append([df3, expected_histogram3])
 
         # this data has only one unique value, not overflow
-        df4 = pd.Series([-10.0, -10.0, -10.0]).apply(str)
+        df4 = pl.Series([-10.0, -10.0, -10.0]).map_elements(str)
         expected_histogram4 = {
             "bin_counts": np.array([3]),
             "bin_edges": np.array([-10.0, -10.0]),
@@ -674,7 +666,7 @@ def test_profiled_histogram(self):
         list_data_test.append([df4, expected_histogram4])
 
         # this data has only one unique value, overflow
-        df5 = pd.Series([-(10.0**20)]).apply(str)
+        df5 = pl.Series([-(10.0**20)]).map_elements(str)
         expected_histogram5 = {
             "bin_counts": np.array([1]),
             "bin_edges": np.array([-(10.0**20), -(10.0**20)]),
@@ -711,7 +703,7 @@ def test_profile_histogram_w_updates(self):
         list_data_test = []
         # this data has 4 bins, range of 3
         # with equal bin size, each bin has the width of 0.75
-        df1 = pd.Series(["1.0", "2.0", "3.0", "4.0"])
+        df1 = pl.Series(["1.0", "2.0", "3.0", "4.0"])
         expected_histogram1 = {
             "bin_counts": np.array([1, 1, 1, 1]),
             "bin_edges": np.array([1.0, 1.75, 2.5, 3.25, 4.0]),
@@ -721,7 +713,7 @@ def test_profile_histogram_w_updates(self):
         # this data will be the second update of the profile.
         # this results in the combination of the previous data and this data.
         # the range should update to 12 from 3.
-        df2 = pd.Series(["1.0", "5.0", "8.0", "13.0"])
+        df2 = pl.Series(["1.0", "5.0", "8.0", "13.0"])
         expected_histogram2 = {
             "bin_counts": np.array([4, 1, 1, 1, 0, 1]),
             "bin_edges": np.array([1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0]),
@@ -746,7 +738,7 @@ def test_profile_histogram_w_updates(self):
                 np.round(histogram["bin_edges"], 12),
             )
 
-        # apply test to merging profiles
+        # map_elements test to merging profiles
         expected_histogram = {
             "bin_edges": np.array(
                 [1.0, 19 / 7, 31 / 7, 43 / 7, 55 / 7, 67 / 7, 79 / 7, 13.0]
@@ -772,7 +764,7 @@ def test_histogram_with_varying_number_of_bin(self):
         Checks the histogram with large number of bins
         """
         # this data use number of bins less than the max limit
-        df1 = pd.Series([1, 2, 3, 4]).apply(str)
+        df1 = pl.Series([1, 2, 3, 4]).map_elements(str)
         profiler1 = FloatColumn(df1.name)
         profiler1.max_histogram_bin = 50
         profiler1.update(df1)
@@ -781,9 +773,9 @@ def test_histogram_with_varying_number_of_bin(self):
 
         # this data uses large number of bins, which will be set to
         # the max limit
-        df2 = pd.Series(
+        df2 = pl.Series(
             [3.195103249264023e18, 9999995.0, 9999999.0, 0.0, -(10**10)]
-        ).apply(str)
+        ).map_elements(str)
         profiler2 = FloatColumn(df2.name)
         profiler2.max_histogram_bin = 50
         profiler2.update(df2)
@@ -798,7 +790,7 @@ def test_histogram_with_varying_number_of_bin(self):
         self.assertEqual(10000, num_bins)
 
     def test_estimate_stats_from_histogram(self):
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = FloatColumn(data.name)
         profiler.update(data)
         profiler._stored_histogram["histogram"]["bin_counts"] = np.array([1, 2, 1])
@@ -816,7 +808,7 @@ def test_estimate_stats_from_histogram(self):
         self.assertEqual(expected_var, est_var)
 
     def test_total_histogram_bin_variance(self):
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = FloatColumn(data.name)
         profiler.update(data)
         profiler._stored_histogram["histogram"]["bin_counts"] = np.array([3, 2, 1])
@@ -864,7 +856,7 @@ def test_histogram_loss(self):
         self.assertEqual(expected_loss, est_loss)
 
     def test_select_method_for_histogram(self):
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = FloatColumn(data.name)
         profiler.update(data)
         list_method = ["auto", "fd", "doane", "scott", "rice", "sturges", "sqrt"]
@@ -900,7 +892,7 @@ def test_select_method_for_histogram(self):
         self.assertEqual(selected_method, "sturges")
 
     def test_histogram_to_array(self):
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = FloatColumn(data.name)
         profiler.update(data)
         profiler._stored_histogram["histogram"]["bin_counts"] = np.array([3, 2, 1])
@@ -912,7 +904,7 @@ def test_histogram_to_array(self):
         self.assertEqual(expected_array, array_from_histogram.tolist())
 
     def test_merge_histogram(self):
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = FloatColumn(data.name)
         profiler.update(data)
         profiler._stored_histogram["histogram"]["bin_counts"] = np.array([3, 2])
@@ -936,7 +928,7 @@ def test_profiled_quantiles(self):
         # with equal bin size, each bin has the width of 0.75
 
         data = ["1.0", "2.0", "3.0", "4.0"]
-        df = pd.Series(data)
+        df = pl.Series(data)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         profile = profiler.profile
@@ -960,7 +952,7 @@ def test_get_median_abs_deviation(self):
         """
         # with different values
         data = ["1.0", "1.0", "1.0", "1.0", "2.0"]
-        df = pd.Series(data)
+        df = pl.Series(data)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         profile = profiler.profile
@@ -970,7 +962,7 @@ def test_get_median_abs_deviation(self):
 
         # with unique values
         data = ["1.0", "1.0", "1.0", "1.0", "1.0"]
-        df = pd.Series(data)
+        df = pl.Series(data)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         profile = profiler.profile
@@ -980,7 +972,7 @@ def test_get_median_abs_deviation(self):
 
         # with negative values
         data = ["-1.0", "1.0", "1.0", "1.0", "2.0"]
-        df = pd.Series(data)
+        df = pl.Series(data)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         profile = profiler.profile
@@ -992,7 +984,7 @@ def test_get_median_abs_deviation(self):
         # in this example, 1.5 and 13.5 both have the counts 0.5
         # then the median absolute deviation should be the average, 7.5
         data = ["-9.0", "-8.0", "4.0", "5.0", "6.0", "7.0", "19.0", "20.0"]
-        df = pd.Series(data)
+        df = pl.Series(data)
         profiler = FloatColumn(df.name)
         profiler.update(df)
         profile = profiler.profile
@@ -1007,12 +999,12 @@ def test_merge_median_abs_deviation(self):
         """
         # with different values
         data1 = ["1.0", "1.0", "1.0", "2.0"]
-        df1 = pd.Series(data1)
+        df1 = pl.Series(data1)
         profiler = FloatColumn(df1.name)
         profiler.update(df1)
 
         data2 = ["0.0", "0.0", "2.0", "3.0", "3.0"]
-        df2 = pd.Series(data2)
+        df2 = pl.Series(data2)
         profiler.update(df2)
         profile = profiler.profile
 
@@ -1021,12 +1013,12 @@ def test_merge_median_abs_deviation(self):
 
         # with unique values
         data1 = ["1.0", "1.0", "1.0", "1.0"]
-        df1 = pd.Series(data1)
+        df1 = pl.Series(data1)
         profiler = FloatColumn(df1.name)
         profiler.update(df1)
 
         data2 = ["1.0", "1.0", "1.0", "1.0", "1.0"]
-        df2 = pd.Series(data2)
+        df2 = pl.Series(data2)
         profiler.update(df2)
         profile = profiler.profile
 
@@ -1035,19 +1027,19 @@ def test_merge_median_abs_deviation(self):
 
     def test_data_type_ratio(self):
         data = np.linspace(-5, 5, 4)
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         profiler = FloatColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.data_type_ratio, 1.0)
 
-        df = pd.Series(["not a float"])
+        df = pl.Series([None])
         profiler.update(df)
         self.assertEqual(profiler.data_type_ratio, 0.8)
 
     def test_profile(self):
-        data = [2.5, 12.5, "not a float", 5, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.5, 12.5, None, 5, None]
+        df = pl.Series(data).map_elements(str)
 
         profiler = FloatColumn(df.name)
 
@@ -1172,7 +1164,7 @@ def test_report(self):
         `remove_disabled_flag`.
         """
         data = [1.1, 2.2, 3.3, 4.4]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         # With FloatOptions and remove_disabled_flag == True
         options = FloatOptions()
@@ -1197,7 +1189,7 @@ def test_report(self):
 
     def test_option_precision(self):
         data = [1.1, 2.2, 3.3, 4.4]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         # Turn off precision
         options = FloatOptions()
@@ -1221,8 +1213,8 @@ def test_option_precision(self):
         self.assertEqual(2, profiler.precision["sample_size"])
 
     def test_option_timing(self):
-        data = [2.0, 12.5, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, 12.5, None, 6.0, None]
+        df = pl.Series(data).map_elements(str)
 
         options = FloatOptions()
         options.set({"min.is_enabled": False})
@@ -1273,13 +1265,13 @@ def test_option_timing(self):
             self.assertCountEqual(expected, profiler.profile["times"])
 
     def test_profile_merge(self):
-        data = [2.0, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, None, 6.0, None]
+        df = pl.Series(data).map_elements(str)
         profiler1 = FloatColumn("Float")
         profiler1.update(df)
 
-        data2 = [10.0, "not a float", 15.0, "not a float"]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [10.0, None, 15.0, None]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = FloatColumn("Float")
         profiler2.update(df2)
 
@@ -1332,13 +1324,13 @@ def test_profile_merge(self):
         self.assertCountEqual(histogram["bin_edges"], expected_histogram["bin_edges"])
 
     def test_profile_merge_for_zeros_and_negatives(self):
-        data = [2.0, 8.5, "not an int", 6.0, -3, 0]
-        df = pd.Series(data).apply(str)
+        data = [2.0, 8.5, None, 6.0, -3, 0]
+        df = pl.Series(data).map_elements(str)
         profiler1 = FloatColumn("Float")
         profiler1.update(df)
 
-        data2 = [0.0, 3.5, "not an int", 125.0, 0, -0.1, -88]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [0.0, 3.5, None, 125.0, 0, -0.1, -88]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = FloatColumn("Float")
         profiler2.update(df2)
 
@@ -1350,14 +1342,14 @@ def test_profile_merge_for_zeros_and_negatives(self):
         self.assertEqual(profiler3.num_negatives, expected_profile.pop("num_negatives"))
 
     def test_profile_merge_edge_case(self):
-        data = [2.0, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, None, 6.0, None]
+        df = pl.Series(data).map_elements(str)
         profiler1 = FloatColumn("Float")
         profiler1.update(df)
         profiler1.match_count = 0
 
-        data2 = [10.0, "not a float", 15.0, "not a float"]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [10.0, None, 15.0, None]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = FloatColumn("Float")
         profiler2.update(df2)
 
@@ -1365,11 +1357,11 @@ def test_profile_merge_edge_case(self):
         self.assertEqual(profiler3.stddev, profiler2.stddev)
 
         # test merge with empty data
-        df1 = pd.Series([], dtype=object)
+        df1 = pl.Series([], dtype=object)
         profiler1 = FloatColumn("Float")
         profiler1.update(df1)
 
-        df2 = pd.Series([], dtype=object)
+        df2 = pl.Series([], dtype=object)
         profiler2 = FloatColumn("Float")
         profiler2.update(df2)
 
@@ -1379,7 +1371,7 @@ def test_profile_merge_edge_case(self):
         self.assertEqual(profiler.min, None)
         self.assertEqual(profiler.max, None)
 
-        df3 = pd.Series([2.0, 3.0]).apply(str)
+        df3 = pl.Series([2.0, 3.0]).map_elements(str)
         profiler3 = FloatColumn("Float")
         profiler3.update(df3)
 
@@ -1389,7 +1381,7 @@ def test_profile_merge_edge_case(self):
         self.assertEqual(profiler.min, 2.0)
         self.assertEqual(profiler.max, 3.0)
 
-        df4 = pd.Series([4.0, 5.0]).apply(str)
+        df4 = pl.Series([4.0, 5.0]).map_elements(str)
         profiler4 = FloatColumn("Float")
         profiler4.update(df4)
 
@@ -1401,7 +1393,7 @@ def test_profile_merge_edge_case(self):
         self.assertEqual(profiler.num_zeros, 0)
         self.assertEqual(profiler.num_negatives, 0)
 
-        df5 = pd.Series([0.0, 0.0, -1.1, -1.0]).apply(str)
+        df5 = pl.Series([0.0, 0.0, -1.1, -1.0]).map_elements(str)
         profiler5 = FloatColumn("Float")
         profiler5.update(df5)
 
@@ -1416,13 +1408,13 @@ def test_custom_bin_count_merge(self):
         options = FloatOptions()
         options.histogram_and_quantiles.bin_count_or_method = 10
 
-        data = [2.0, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, None, 6.0, None]
+        df = pl.Series(data).map_elements(str)
         profiler1 = FloatColumn("Float", options)
         profiler1.update(df)
 
-        data2 = [10.0, "not a float", 15.0, "not a float"]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [10.0, None, 15.0, None]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = FloatColumn("Float", options)
         profiler2.update(df2)
 
@@ -1444,13 +1436,13 @@ def test_custom_bin_count_merge(self):
 
     def test_profile_merge_no_bin_overlap(self):
 
-        data = [2.0, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, np.nan, 6.0, np.nan]
+        df = pl.Series(data, dtype=pl.Float64).map_elements(str)
         profiler1 = FloatColumn("Float")
         profiler1.update(df)
 
-        data2 = [10.0, "not a float", 15.0, "not a float"]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [10.0, np.nan, 15.0, np.nan]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = FloatColumn("Float")
         profiler2.update(df2)
 
@@ -1473,7 +1465,7 @@ def test_profile_merge_with_different_options(self):
         options.histogram_and_quantiles.bin_count_or_method = None
 
         data = [2, 4, 6, 8]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
         profiler1 = FloatColumn("Float", options=options)
         profiler1.update(df)
 
@@ -1484,7 +1476,7 @@ def test_profile_merge_with_different_options(self):
         options.histogram_and_quantiles.bin_count_or_method = None
 
         data2 = [10, 15]
-        df2 = pd.Series(data2).apply(str)
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = FloatColumn("Float", options=options)
         profiler2.update(df2)
 
@@ -1521,7 +1513,7 @@ def test_profile_merge_with_different_options(self):
         options.histogram_and_quantiles.method = None
 
         data = [2, 4, 6, 8]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
         profiler1 = FloatColumn("Float", options=options)
         profiler1.update(df)
 
@@ -1552,13 +1544,13 @@ def test_histogram_option_integration(self):
         self.assertEqual(["custom"], num_profiler.histogram_bin_method_names)
 
         # case when just 1 unique value, should just set bin size to be 1
-        num_profiler.update(pd.Series(["1", "1"]))
+        num_profiler.update(pl.Series(["1", "1"]))
         self.assertEqual(
             1, len(num_profiler.histogram_methods["custom"]["histogram"]["bin_counts"])
         )
 
         # case when more than 1 unique value, by virtue of a streaming update
-        num_profiler.update(pd.Series(["2"]))
+        num_profiler.update(pl.Series(["2"]))
         self.assertEqual(
             100, len(num_profiler._stored_histogram["histogram"]["bin_counts"])
         )
@@ -1580,7 +1572,7 @@ def test_profile_merge_bin_edges_indices(self):
             4948484957575651505156554954485054.0,
         ]
 
-        data = pd.Series(vals).astype(str)
+        data = pl.Series(vals).cast(str)
         data_1 = data[:5]
         data_2 = data[5:]
 
@@ -1597,7 +1589,7 @@ def test_profile_merge_bin_edges_indices(self):
         profile_1 + profile_2
 
     def test_invalid_values(self):
-        data = pd.Series(["-inf", "inf"])
+        data = pl.Series(["-inf", "inf"])
         profiler = FloatColumn(data.name)
 
         with self.assertWarnsRegex(
@@ -1611,7 +1603,7 @@ def test_invalid_values(self):
             self.assertTrue(np.isnan(profiler._biased_kurtosis))
 
         # Update the data
-        data2 = pd.Series(["-2", "-1", "1", "2", "-inf", "inf"])
+        data2 = pl.Series(["-2", "-1", "1", "2", "-inf", "inf"])
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
 
@@ -1627,7 +1619,7 @@ def test_invalid_values(self):
             self.assertEqual(0, len(w))
 
     def test_insufficient_counts(self):
-        data = pd.Series(["0"])
+        data = pl.Series(["0"])
         profiler = FloatColumn(data.name)
 
         with warnings.catch_warnings(record=True) as w:
@@ -1650,7 +1642,7 @@ def test_insufficient_counts(self):
                 )
 
         # Update the data so that the match count is good
-        data2 = pd.Series(["-2", "-1", "1", "2"])
+        data2 = pl.Series(["-2", "-1", "1", "2"])
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
 
@@ -1668,20 +1660,19 @@ def test_insufficient_counts(self):
             self.assertEqual(0, len(w))
 
     def test_diff(self):
-        data = [2.5, 12.5, "not a float", 5, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.5, 12.5, None, 5, None]
+        df = pl.Series(data).map_elements(str)
         profiler1 = FloatColumn(df.name)
         profiler1.update(df)
         profile1 = profiler1.profile
 
         data = [1, 15, 0.5, 0]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
         profiler2 = FloatColumn(df.name)
         profiler2.update(df)
         profile2 = profiler2.profile
 
         # Assert the difference report is correct
-        diff = profiler1.diff(profiler2)
         expected_diff = {
             "max": -2.5,
             "mean": profile1["mean"] - profile2["mean"],
@@ -1841,7 +1832,7 @@ def test_json_encode(self):
     @mock.patch("time.time", return_value=0.0)
     def test_json_encode_after_update(self, time):
         data = np.array([0.0, 5.0, 10.0])
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         float_options = FloatOptions()
         float_options.histogram_and_quantiles.bin_count_or_method = 5
@@ -1980,7 +1971,9 @@ def test_json_decode_after_update(self):
         # Actual deserialization
 
         # Build expected FloatColumn
-        df_float = pd.Series([-1.5, 2.2, 5.0, 7.0, 4.0, 3.0, 2.0, 0, 0, 9.0]).apply(str)
+        df_float = pl.Series(
+            [-1.5, 2.2, 5.0, 7.0, 4.0, 3.0, 2.0, 0, 0, 9.0]
+        ).map_elements(str)
         expected_profile = FloatColumn(fake_profile_name)
 
         with test_utils.mock_timeit():
@@ -1991,12 +1984,12 @@ def test_json_decode_after_update(self):
 
         test_utils.assert_profiles_equal(deserialized, expected_profile)
 
-        df_float = pd.Series(
+        df_float = pl.Series(
             [
                 4.0,  # add existing
                 15.0,  # add new
             ]
-        ).apply(str)
+        ).map_elements(str)
 
         # validating update after deserialization
         deserialized.update(df_float)

From cf685686ad436fac1f9edffff0f514c664e33ad1 Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Wed, 28 Feb 2024 14:38:29 -0600
Subject: [PATCH 08/11] finish int col tests

---
 dataprofiler/profilers/int_column_profile.py  |  11 +-
 .../profilers/test_int_column_profile.py      | 254 +++++++++---------
 2 files changed, 128 insertions(+), 137 deletions(-)

diff --git a/dataprofiler/profilers/int_column_profile.py b/dataprofiler/profilers/int_column_profile.py
index 5e1ad6ee..30e7a4a8 100644
--- a/dataprofiler/profilers/int_column_profile.py
+++ b/dataprofiler/profilers/int_column_profile.py
@@ -150,7 +150,7 @@ def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
             NumericStatsMixin._update_helper(self, df_series_clean, profile)
         self._update_column_base_properties(profile)
 
-    def update(self, df_series: pd.Series) -> IntColumn:
+    def update(self, df_series: pl.Series) -> IntColumn:
         """
         Update the column profile.
 
@@ -159,14 +159,7 @@ def update(self, df_series: pd.Series) -> IntColumn:
         :return: updated IntColumn
         :rtype: IntColumn
         """
-        self._greater_than_64_bit = (
-            not df_series.empty
-            and df_series.apply(pd.to_numeric, errors="coerce").dtype == "O"
-        )
-        if self._greater_than_64_bit:
-            df_series = pl.Series(df_series.to_list(), dtype=pl.Object)
-        else:
-            df_series = pl.from_pandas(df_series)
+        self._greater_than_64_bit = df_series.dtype == pl.Object
         if len(df_series) == 0:
             return self
 
diff --git a/dataprofiler/tests/profilers/test_int_column_profile.py b/dataprofiler/tests/profilers/test_int_column_profile.py
index 718348cf..f3925469 100644
--- a/dataprofiler/tests/profilers/test_int_column_profile.py
+++ b/dataprofiler/tests/profilers/test_int_column_profile.py
@@ -6,7 +6,7 @@
 from unittest import mock
 
 import numpy as np
-import pandas as pd
+import polars as pl
 
 from dataprofiler.profilers import IntColumn
 from dataprofiler.profilers.json_decoder import load_column_profile
@@ -20,7 +20,7 @@
 
 class TestIntColumn(unittest.TestCase):
     def test_base_case(self):
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = IntColumn(data.name)
         profiler.update(data)
 
@@ -41,7 +41,7 @@ def test_base_case(self):
         self.assertIsNone(profiler.data_type_ratio)
 
     def test_single_data_variance_case(self):
-        data = pd.Series([1])
+        data = pl.Series([1])
         profiler = IntColumn(data.name)
         profiler.update(data)
         self.assertEqual(profiler.match_count, 1)
@@ -49,7 +49,7 @@ def test_single_data_variance_case(self):
         self.assertEqual(profiler.mean, 1)
         self.assertTrue(profiler.variance is np.nan)
 
-        data = pd.Series([2])
+        data = pl.Series([2])
         profiler.update(data)
         self.assertEqual(profiler.match_count, 2)
         self.assertEqual(profiler.sum, 3)
@@ -58,7 +58,7 @@ def test_single_data_variance_case(self):
 
     def test_profiled_min(self):
         data = np.linspace(-5, 5, 11)
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         profiler = IntColumn(df.name)
         profiler.update(df[1:])
@@ -67,42 +67,42 @@ def test_profiled_min(self):
         profiler.update(df)
         self.assertEqual(profiler.min, -5)
 
-        profiler.update(pd.Series(["-4"]))
+        profiler.update(pl.Series(["-4"]))
         self.assertEqual(profiler.min, -5)
 
         # empty data
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = IntColumn(data.name)
         profiler.update(data)
         self.assertEqual(profiler.min, None)
 
         # data with None value
-        df = pd.Series([2, 3, None, np.nan]).apply(str)
+        df = pl.Series([2, 3, None, np.nan]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 2)
 
         # data with one value
-        df = pd.Series([2]).apply(str)
+        df = pl.Series([2]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 2)
 
         # data with unique value
-        df = pd.Series([2, 2, 2, 2, 2]).apply(str)
+        df = pl.Series([2, 2, 2, 2, 2]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 2)
 
         # data with unique value as zero
-        df = pd.Series([0, 0, 0, 0, 0]).apply(str)
+        df = pl.Series([0, 0, 0, 0, 0]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 0)
 
     def test_profiled_max(self):
         data = np.linspace(-5, 5, 11)
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         profiler = IntColumn(df.name)
         profiler.update(df[:-1])
@@ -111,42 +111,42 @@ def test_profiled_max(self):
         profiler.update(df)
         self.assertEqual(profiler.max, 5)
 
-        profiler.update(pd.Series(["4"]))
+        profiler.update(pl.Series(["4"]))
         self.assertEqual(profiler.max, 5)
 
         # empty data
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = IntColumn(data.name)
         profiler.update(data)
         self.assertEqual(profiler.max, None)
 
         # data with None value
-        df = pd.Series([2, 3, None, np.nan]).apply(str)
+        df = pl.Series([2, 3, None, np.nan]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 3)
 
         # data with one value
-        df = pd.Series([2]).apply(str)
+        df = pl.Series([2]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 2)
 
         # data with unique value
-        df = pd.Series([2, 2, 2, 2, 2]).apply(str)
+        df = pl.Series([2, 2, 2, 2, 2]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 2)
 
         # data with unique value as zero
-        df = pd.Series([0, 0, 0, 0, 0]).apply(str)
+        df = pl.Series([0, 0, 0, 0, 0]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 0)
 
     def test_profiled_mode(self):
         # disabled mode
-        df = pd.Series([1, 1, 1, 1, 1, 1, 1]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 1, 1, 1]).map_elements(str)
         options = IntOptions()
         options.mode.is_enabled = False
         profiler = IntColumn(df.name, options)
@@ -154,43 +154,43 @@ def test_profiled_mode(self):
         self.assertListEqual([np.nan], profiler.mode)
 
         # same values
-        df = pd.Series([1, 1, 1, 1, 1, 1, 1]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 1, 1, 1]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertListEqual([1], profiler.mode)
 
         # multiple modes
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([1, 2, 3, 4, 5], profiler.mode, decimal=2)
 
         # with different values
-        df = pd.Series([1, 1, 1, 1, 2]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 2]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([1], profiler.mode, decimal=2)
 
         # with negative values
-        df = pd.Series([-1, 1, 1, 1, 2, 2, 2])
+        df = pl.Series([-1, 1, 1, 1, 2, 2, 2])
         profiler = IntColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([1, 2], profiler.mode, decimal=2)
 
         # all unique values
-        df = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).apply(str)
+        df = pl.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         # By default, returns 5 of the possible modes
         np.testing.assert_array_almost_equal([1, 2, 3, 4, 5], profiler.mode, decimal=2)
 
         # Edge case where mode appears later in the dataset
-        df = pd.Series([1, 2, 3, 4, 5, 6, 6]).apply(str)
+        df = pl.Series([1, 2, 3, 4, 5, 6, 6]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([6], profiler.mode, decimal=2)
 
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         np.testing.assert_array_almost_equal([7], profiler.mode, decimal=2)
@@ -198,7 +198,7 @@ def test_profiled_mode(self):
     def test_top_k_modes(self):
         # Default options
         options = IntOptions()
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).map_elements(str)
         profiler = IntColumn(df.name, options)
         profiler.update(df)
         self.assertEqual(5, len(profiler.mode))
@@ -206,7 +206,7 @@ def test_top_k_modes(self):
         # Test if top_k_modes is less than the number of modes
         options = IntOptions()
         options.mode.top_k_modes = 2
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).map_elements(str)
         profiler = IntColumn(df.name, options)
         profiler.update(df)
         self.assertEqual(2, len(profiler.mode))
@@ -214,7 +214,7 @@ def test_top_k_modes(self):
         # Test if top_k_mode is greater than the number of modes
         options = IntOptions()
         options.mode.top_k_modes = 8
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5]).map_elements(str)
         profiler = IntColumn(df.name, options)
         profiler.update(df)
         # Only 5 possible modes so return 5
@@ -222,7 +222,7 @@ def test_top_k_modes(self):
 
     def test_profiled_median(self):
         # disabled median
-        df = pd.Series([1, 1, 1, 1, 1, 1, 1]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 1, 1, 1]).map_elements(str)
         options = IntOptions()
         options.median.is_enabled = False
         profiler = IntColumn(df.name, options)
@@ -230,31 +230,31 @@ def test_profiled_median(self):
         self.assertTrue(profiler.median is np.nan)
 
         # same values
-        df = pd.Series([1, 1, 1, 1, 1, 1, 1]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 1, 1, 1]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(1, profiler.median)
 
         # median lies between two values s
-        df = pd.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6]).apply(str)
+        df = pl.Series([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertAlmostEqual(3.5, profiler.median, places=2)
 
         # with different values
-        df = pd.Series([1, 1, 1, 1, 2]).apply(str)
+        df = pl.Series([1, 1, 1, 1, 2]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertAlmostEqual(1, profiler.median, places=2)
 
         # with negative values
-        df = pd.Series([-1, 1, 1, 1, 2, 2, 2])
+        df = pl.Series([-1, 1, 1, 1, 2, 2, 2])
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertAlmostEqual(1, profiler.median, places=2)
 
         # all unique values
-        df = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).apply(str)
+        df = pl.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).map_elements(str)
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertAlmostEqual(5.5, profiler.median, places=2)
@@ -286,22 +286,22 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
             return M2 / (count_a + count_b - 1)
 
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         num_profiler = IntColumn(df1.name)
-        num_profiler.update(df1.apply(str))
+        num_profiler.update(df1.map_elements(str))
 
         self.assertEqual(mean(df1), num_profiler.mean)
         self.assertEqual(var(df1), num_profiler.variance)
         self.assertEqual(np.sqrt(var(df1)), num_profiler.stddev)
 
-        df2_ints = df2[df2 == df2.round()]
+        df2_ints = df2.filter(df2 == df2.round())
         variance = batch_variance(
             mean_a=num_profiler.mean,
             var_a=num_profiler.variance,
@@ -310,13 +310,13 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
             var_b=var(df2_ints),
             count_b=df2_ints.count(),
         )
-        num_profiler.update(df2.apply(str))
-        df = pd.concat([df1, df2_ints])
+        num_profiler.update(df2.map_elements(str))
+        df = pl.concat([df1, df2_ints])
         self.assertEqual(mean(df), num_profiler.mean)
         self.assertEqual(variance, num_profiler.variance)
         self.assertEqual(np.sqrt(variance), num_profiler.stddev)
 
-        df3_ints = df3[df3 == df3.round()]
+        df3_ints = df3.filter(df3 == df3)
         variance = batch_variance(
             mean_a=num_profiler.mean,
             var_a=num_profiler.variance,
@@ -325,118 +325,117 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
             var_b=var(df3_ints),
             count_b=df3_ints.count(),
         )
-        num_profiler.update(df3.apply(str))
+        num_profiler.update(df3.map_elements(str))
 
-        df = pd.concat([df1, df2_ints, df3_ints])
+        df = pl.concat([df1, df2_ints.cast(pl.Float64), df3_ints.cast(pl.Float64)])
         self.assertEqual(mean(df), num_profiler.mean)
         self.assertAlmostEqual(variance, num_profiler.variance)
         self.assertAlmostEqual(np.sqrt(variance), num_profiler.stddev)
 
     def test_profiled_skewness(self):
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         num_profiler = IntColumn(df1.name)
-        num_profiler.update(df1.apply(str))
+        num_profiler.update(df1.map_elements(str))
 
         self.assertEqual(0, num_profiler.skewness)
 
-        df2_ints = df2[df2 == df2.round()]
-        num_profiler.update(df2.apply(str))
-        df = pd.concat([df1, df2_ints])
+        df2_ints = df2.filter(df2 == df2.round())
+        num_profiler.update(df2.map_elements(str))
+        df = pl.concat([df1, df2_ints])
         self.assertAlmostEqual(11 * np.sqrt(102 / 91) / 91, num_profiler.skewness)
 
-        df3_ints = df3[df3 == df3.round()]
-        num_profiler.update(df3.apply(str))
-        df = pd.concat([df1, df2_ints, df3_ints])
+        df3_ints = df3.filter(df3 == df3)
+        num_profiler.update(df3.map_elements(str))
+        df = pl.concat([df1, df2_ints.cast(pl.Float64), df3_ints.cast(pl.Float64)])
         self.assertAlmostEqual(-6789 * np.sqrt(39 / 463) / 4630, num_profiler.skewness)
 
     def test_profiled_kurtosis(self):
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         num_profiler = IntColumn(df1.name)
-        num_profiler.update(df1.apply(str))
+        num_profiler.update(df1.map_elements(str))
 
         self.assertAlmostEqual(-6 / 5, num_profiler.kurtosis)
 
-        df2_ints = df2[df2 == df2.round()]
-        num_profiler.update(df2.apply(str))
-        df = pd.concat([df1, df2_ints])
+        df2_ints = df2.filter(df2 == df2.round())
+        num_profiler.update(df2.map_elements(str))
+        df = pl.concat([df1, df2_ints])
         self.assertAlmostEqual(-29886 / 41405, num_profiler.kurtosis)
 
-        df3_ints = df3[df3 == df3.round()]
-        num_profiler.update(df3.apply(str))
-        df = pd.concat([df1, df2_ints, df3_ints])
+        df3_ints = df3.filter(df3 == df3)
+        num_profiler.update(df3.map_elements(str))
+        df = pl.concat([df1, df2_ints.cast(pl.Float64), df3_ints.cast(pl.Float64)])
         self.assertAlmostEqual(16015779 / 42873800, num_profiler.kurtosis)
 
     def test_bias_correction_option(self):
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         # Disable bias correction
         options = IntOptions()
         options.bias_correction.is_enabled = False
         num_profiler = IntColumn(df1.name, options=options)
-        num_profiler.update(df1.apply(str))
+        num_profiler.update(df1.map_elements(str))
         self.assertAlmostEqual(10, num_profiler.variance)
         self.assertAlmostEqual(0, num_profiler.skewness)
         self.assertAlmostEqual(89 / 50 - 3, num_profiler.kurtosis)
 
-        df2_ints = df2[df2 == df2.round()]
-        num_profiler.update(df2.apply(str))
-        df = pd.concat([df1, df2_ints])
+        df2_ints = df2.filter(df2 == df2.round())
+        num_profiler.update(df2.map_elements(str))
+        df = pl.concat([df1, df2_ints])
         self.assertAlmostEqual(2184 / 289, num_profiler.variance)
         self.assertAlmostEqual(165 * np.sqrt(3 / 182) / 182, num_profiler.skewness)
         self.assertAlmostEqual(60769 / 28392 - 3, num_profiler.kurtosis)
 
-        df3_ints = df3[df3 == df3.round()]
-        num_profiler.update(df3.apply(str))
-        df = pd.concat([df1, df2_ints, df3_ints])
+        df3_ints = df3.filter(df3 == df3)
+        num_profiler.update(df3.map_elements(str))
+        df = pl.concat([df1, df2_ints.cast(pl.Float64), df3_ints.cast(pl.Float64)])
         self.assertAlmostEqual(3704 / 729, num_profiler.variance)
         self.assertAlmostEqual(-11315 / (926 * np.sqrt(926)), num_profiler.skewness)
         self.assertAlmostEqual(5305359 / 1714952 - 3, num_profiler.kurtosis)
 
     def test_bias_correction_merge(self):
         data = np.linspace(-5, 5, 11).tolist()
-        df1 = pd.Series(data)
+        df1 = pl.Series(data)
 
         data = np.linspace(-3, 2, 11).tolist()
-        df2 = pd.Series(data)
+        df2 = pl.Series(data)
 
         data = np.full((10,), 1)
-        df3 = pd.Series(data)
+        df3 = pl.Series(data)
 
         # Disable bias correction
         options = IntOptions()
         options.bias_correction.is_enabled = False
         num_profiler1 = IntColumn(df1.name, options=options)
-        num_profiler1.update(df1.apply(str))
+        num_profiler1.update(df1.map_elements(str))
         self.assertAlmostEqual(10, num_profiler1.variance)
         self.assertAlmostEqual(0, num_profiler1.skewness)
         self.assertAlmostEqual(89 / 50 - 3, num_profiler1.kurtosis)
 
-        df2_ints = df2[df2 == df2.round()]
         num_profiler2 = IntColumn(df2.name)
-        num_profiler2.update(df2.apply(str))
+        num_profiler2.update(df2.map_elements(str))
         num_profiler_merged = num_profiler1 + num_profiler2
         # Values should stay biased values
         self.assertFalse(num_profiler_merged.bias_correction)
@@ -446,9 +445,8 @@ def test_bias_correction_merge(self):
         )
         self.assertAlmostEqual(60769 / 28392 - 3, num_profiler_merged.kurtosis)
 
-        df3_ints = df3[df3 == df3.round()]
         num_profiler3 = IntColumn(df3.name)
-        num_profiler3.update(df3.apply(str))
+        num_profiler3.update(df3.map_elements(str))
         num_profiler_merged = num_profiler1 + num_profiler2 + num_profiler3
         self.assertFalse(num_profiler_merged.bias_correction)
         self.assertAlmostEqual(3704 / 729, num_profiler_merged.variance)
@@ -492,7 +490,7 @@ def test_profiled_histogram(self):
         list_data_test.append([data3, expected_histogram3])
 
         for data, expected_histogram in list_data_test:
-            df = pd.Series(data)
+            df = pl.Series(data)
             profiler = IntColumn(df.name)
             profiler.update(df)
 
@@ -510,19 +508,19 @@ def test_profiled_histogram(self):
 
     def test_data_type_ratio(self):
         data = np.linspace(-5, 5, 11)
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         profiler = IntColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.data_type_ratio, 1.0)
 
-        df = pd.Series(["not a float", "0.1"])
+        df = pl.Series([None, "0.1"])
         profiler.update(df)
         self.assertEqual(profiler.data_type_ratio, 11 / 13.0)
 
     def test_profile(self):
-        data = [2.0, 12.5, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, 12.5, None, 6.0, None]
+        df = pl.Series(data).map_elements(str)
 
         profiler = IntColumn(df.name)
 
@@ -642,8 +640,8 @@ def test_profile(self):
             self.assertEqual(expected, profiler.profile["times"])
 
     def test_option_timing(self):
-        data = [2.0, 12.5, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, 12.5, None, 6.0, None]
+        df = pl.Series(data).map_elements(str)
 
         options = IntOptions()
         options.set({"min.is_enabled": False})
@@ -694,13 +692,13 @@ def test_option_timing(self):
     def test_profile_merge(self):
         # Floats are not included intentionally for the test
         # below as this is an int column
-        data = [2.0, 12.5, "not an int", 6.0, "not an int"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, 12.5, None, 6.0, None]
+        df = pl.Series(data).map_elements(str)
         profiler1 = IntColumn("Int")
         profiler1.update(df)
 
-        data2 = [10.0, 3.5, "not an int", 15.0, "not an int"]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [10.0, 3.5, None, 15.0, None]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = IntColumn("Int")
         profiler2.update(df2)
 
@@ -749,13 +747,13 @@ def test_profile_merge(self):
         self.assertCountEqual(histogram["bin_edges"], expected_histogram["bin_edges"])
 
     def test_profile_merge_for_zeros_and_negatives(self):
-        data = [2.0, 8.5, "not an int", 6.0, -3, 0]
-        df = pd.Series(data).apply(str)
+        data = [2.0, 8.5, None, 6.0, -3, 0]
+        df = pl.Series(data).map_elements(str)
         profiler1 = IntColumn("Int")
         profiler1.update(df)
 
-        data2 = [0.0, 3.5, "not an int", 125.0, 0, -0.1, -88]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [0.0, 3.5, None, 125.0, 0, -0.1, -88]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = IntColumn("Int")
         profiler2.update(df2)
 
@@ -767,14 +765,14 @@ def test_profile_merge_for_zeros_and_negatives(self):
         self.assertEqual(profiler3.num_negatives, expected_profile.pop("num_negatives"))
 
     def test_profile_merge_edge_case(self):
-        data = [2.0, 12.5, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = [2.0, 12.5, None, 6.0, None]
+        df = pl.Series(data).map_elements(str)
         profiler1 = IntColumn(name="Int")
         profiler1.update(df)
         profiler1.match_count = 0
 
-        data2 = [10.0, 3.5, "not a float", 15.0, "not a float"]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [10.0, 3.5, None, 15.0, None]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = IntColumn(name="Int")
         profiler2.update(df2)
 
@@ -782,11 +780,11 @@ def test_profile_merge_edge_case(self):
         self.assertEqual(profiler3.stddev, profiler2.stddev)
 
         # test merge with empty data
-        df1 = pd.Series([], dtype=object)
+        df1 = pl.Series([], dtype=object)
         profiler1 = IntColumn("Int")
         profiler1.update(df1)
 
-        df2 = pd.Series([], dtype=object)
+        df2 = pl.Series([], dtype=object)
         profiler2 = IntColumn("Int")
         profiler2.update(df2)
 
@@ -797,7 +795,7 @@ def test_profile_merge_edge_case(self):
         self.assertTrue(np.isnan(profiler.kurtosis))
         self.assertIsNone(profiler.histogram_selection)
 
-        df3 = pd.Series([2, 3]).apply(str)
+        df3 = pl.Series([2, 3]).map_elements(str)
         profiler3 = IntColumn("Int")
         profiler3.update(df3)
 
@@ -809,7 +807,7 @@ def test_profile_merge_edge_case(self):
         self.assertEqual(profiler.num_zeros, 0)
         self.assertEqual(profiler.num_negatives, 0)
 
-        df4 = pd.Series([4, 5]).apply(str)
+        df4 = pl.Series([4, 5]).map_elements(str)
         profiler4 = IntColumn("Int")
         profiler4.update(df4)
 
@@ -821,7 +819,7 @@ def test_profile_merge_edge_case(self):
         self.assertEqual(profiler.num_zeros, 0)
         self.assertEqual(profiler.num_negatives, 0)
 
-        df5 = pd.Series([0, 0, -1]).apply(str)
+        df5 = pl.Series([0, 0, -1]).map_elements(str)
         profiler5 = IntColumn("Int")
         profiler5.update(df5)
 
@@ -836,13 +834,13 @@ def test_custom_bin_count_merge(self):
         options = IntOptions()
         options.histogram_and_quantiles.bin_count_or_method = 10
 
-        data = [2, "not an int", 6, "not an int"]
-        df = pd.Series(data).apply(str)
+        data = [2, None, 6, None]
+        df = pl.Series(data).map_elements(str)
         profiler1 = IntColumn("Int", options)
         profiler1.update(df)
 
-        data2 = [10, "not an int", 15, "not an int"]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [10, None, 15, None]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = IntColumn("Int", options)
         profiler2.update(df2)
 
@@ -866,14 +864,14 @@ def test_custom_bin_count_merge(self):
 
     def test_profile_merge_no_bin_overlap(self):
 
-        data = [2, "not an int", 6, "not an int"]
-        df = pd.Series(data).apply(str)
+        data = [2, None, 6, None]
+        df = pl.Series(data).map_elements(str)
         profiler1 = IntColumn("Int")
         profiler1.update(df)
         profiler1.match_count = 0
 
-        data2 = [10, "not an int", 15, "not an int"]
-        df2 = pd.Series(data2).apply(str)
+        data2 = [10, None, 15, None]
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = IntColumn("Int")
         profiler2.update(df2)
 
@@ -895,7 +893,7 @@ def test_profile_merge_with_different_options(self):
         options.min.is_enabled = False
 
         data = [2, 4, 6, 8]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
         profiler1 = IntColumn("Int", options=options)
         profiler1.update(df)
         profiler1.match_count = 0
@@ -904,7 +902,7 @@ def test_profile_merge_with_different_options(self):
         options = IntOptions()
         options.min.is_enabled = False
         data2 = [10, 15]
-        df2 = pd.Series(data2).apply(str)
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = IntColumn("Int", options=options)
         profiler2.update(df2)
 
@@ -950,13 +948,13 @@ def test_histogram_option_integration(self):
         self.assertEqual(["custom"], num_profiler.histogram_bin_method_names)
 
         # case when just 1 unique value, should just set bin size to be 1
-        num_profiler.update(pd.Series(["1", "1"]))
+        num_profiler.update(pl.Series(["1", "1"]))
         self.assertEqual(
             1, len(num_profiler.histogram_methods["custom"]["histogram"]["bin_counts"])
         )
 
         # case when more than 1 unique value, by virtue of a streaming update
-        num_profiler.update(pd.Series(["2"]))
+        num_profiler.update(pl.Series(["2"]))
         self.assertEqual(
             100, len(num_profiler._stored_histogram["histogram"]["bin_counts"])
         )
@@ -978,7 +976,7 @@ def test_profile_merge_bin_edges_indices(self):
             4948484957575651505156554954485054,
         ]
 
-        data = pd.Series(vals)
+        data = pl.Series(vals, dtype=pl.Object)
         data_1 = data[:5]
         data_2 = data[5:]
 
@@ -995,7 +993,7 @@ def test_profile_merge_bin_edges_indices(self):
         profile_1 + profile_2
 
     def test_insufficient_counts(self):
-        data = pd.Series(["1"])
+        data = pl.Series(["1"])
         profiler = IntColumn(data.name)
 
         with warnings.catch_warnings(record=True) as w:
@@ -1018,7 +1016,7 @@ def test_insufficient_counts(self):
                 )
 
         # Update the data so that the match count is good
-        data2 = pd.Series(["-2", "-1", "1", "2"])
+        data2 = pl.Series(["-2", "-1", "1", "2"])
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
 
@@ -1039,13 +1037,13 @@ def test_diff(self):
         """
         Makes sure the IntColumn Diff() works appropriately.
         """
-        data = [2, "not an int", 6, 4]
-        df = pd.Series(data).apply(str)
+        data = [2, None, 6, 4]
+        df = pl.Series(data).map_elements(str)
         profiler1 = IntColumn("Int")
         profiler1.update(df)
 
         data = [1, 15]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
         profiler2 = IntColumn("Int")
         profiler2.update(df)
 
@@ -1189,7 +1187,7 @@ def test_json_encode(self):
     @mock.patch("time.time", return_value=0.0)
     def test_json_encode_after_update(self, time):
         data = np.array([0, 5, 10])
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         int_options = IntOptions()
         int_options.histogram_and_quantiles.bin_count_or_method = 5
@@ -1317,7 +1315,7 @@ def test_json_decode_after_update(self):
         # Actual deserialization
 
         # Build expected IntColumn
-        df_int = pd.Series([-1, 2, 5, 7, 4, 3, 2, 0, 0, 9])
+        df_int = pl.Series([-1, 2, 5, 7, 4, 3, 2, 0, 0, 9])
         expected_profile = IntColumn(fake_profile_name)
 
         with test_utils.mock_timeit():
@@ -1333,7 +1331,7 @@ def test_json_decode_after_update(self):
         deserialized.report()
         test_utils.assert_profiles_equal(deserialized, expected_profile)
 
-        df_int = pd.Series(
+        df_int = pl.Series(
             [
                 4,  # add existing
                 15,  # add new

From c0d90a2dce5e2328bb028fb6e25d283a370ceaa5 Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Wed, 28 Feb 2024 16:39:24 -0600
Subject: [PATCH 09/11] update text profiler tests

---
 dataprofiler/profilers/text_column_profile.py |  13 +-
 .../profilers/test_text_column_profile.py     | 126 +++++++++---------
 2 files changed, 71 insertions(+), 68 deletions(-)

diff --git a/dataprofiler/profilers/text_column_profile.py b/dataprofiler/profilers/text_column_profile.py
index 5e5098f6..f2ea321e 100644
--- a/dataprofiler/profilers/text_column_profile.py
+++ b/dataprofiler/profilers/text_column_profile.py
@@ -4,7 +4,6 @@
 import itertools
 
 import numpy as np
-import pandas as pd
 import polars as pl
 
 from . import profiler_utils
@@ -166,12 +165,14 @@ def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
         """
         if self._NumericStatsMixin__calculations:
             text_lengths = df_series_clean.str.len_chars()
-            NumericStatsMixin._update_helper(self, text_lengths.to_pandas(), profile)
+            NumericStatsMixin._update_helper(
+                self, text_lengths.drop_nulls().to_pandas(), profile
+            )
         self._update_column_base_properties(profile)
         if self.max:
             self.type = "string" if self.max <= 255 else "text"
 
-    def update(self, df_series: pd.Series) -> TextColumn:
+    def update(self, df_series: pl.Series) -> TextColumn:
         """
         Update the column profile.
 
@@ -180,17 +181,17 @@ def update(self, df_series: pd.Series) -> TextColumn:
         :return: updated TextColumn
         :rtype: TextColumn
         """
-        df_series = pl.from_pandas(df_series)
         len_df = len(df_series)
         if len_df == 0:
             return self
 
-        profile = dict(match_count=len_df, sample_size=len_df)
+        no_nulls_length = len(df_series.drop_nulls())
+        profile = dict(match_count=no_nulls_length, sample_size=no_nulls_length)
 
         BaseColumnProfiler._perform_property_calcs(
             self,
             self.__calculations,
-            df_series=df_series,
+            df_series=df_series.drop_nulls(),
             prev_dependent_properties={},
             subset_properties=profile,
         )
diff --git a/dataprofiler/tests/profilers/test_text_column_profile.py b/dataprofiler/tests/profilers/test_text_column_profile.py
index 61a54afe..c9e37ab4 100644
--- a/dataprofiler/tests/profilers/test_text_column_profile.py
+++ b/dataprofiler/tests/profilers/test_text_column_profile.py
@@ -6,7 +6,7 @@
 from unittest import mock
 
 import numpy as np
-import pandas as pd
+import polars as pl
 
 from dataprofiler.profilers import TextColumn, profiler_utils
 from dataprofiler.profilers.json_decoder import load_column_profile
@@ -26,7 +26,7 @@ def test_profiled_vocab(self):
         Checks whether the vocab list for the profiler is correct.
         :return:
         """
-        df1 = pd.Series(
+        df1 = pl.Series(
             [
                 "abcd",
                 "aa",
@@ -39,35 +39,35 @@ def test_profiled_vocab(self):
                 "dfd",
                 "2",
             ]
-        ).apply(str)
-        df2 = pd.Series(
+        ).map_elements(str)
+        df2 = pl.Series(
             ["1", "1", "ee", "ff", "ff", "gg", "gg", "abcd", "aa", "b", "ee", "b"]
-        ).apply(str)
-        df3 = pd.Series(
+        ).map_elements(str)
+        df3 = pl.Series(
             [
                 "NaN",
                 "b",
                 "nan",
                 "c",
             ]
-        ).apply(str)
+        ).map_elements(str)
 
         text_profiler = TextColumn(df1.name)
         text_profiler.update(df1)
 
-        unique_vocab = dict.fromkeys("".join(df1.tolist())).keys()
+        unique_vocab = dict.fromkeys("".join(df1.to_list())).keys()
         self.assertCountEqual(unique_vocab, text_profiler.vocab)
         self.assertCountEqual(set(text_profiler.vocab), text_profiler.vocab)
 
         text_profiler.update(df2)
-        df = pd.concat([df1, df2])
-        unique_vocab = dict.fromkeys("".join(df.tolist())).keys()
+        df = pl.concat([df1, df2])
+        unique_vocab = dict.fromkeys("".join(df.to_list())).keys()
         self.assertCountEqual(unique_vocab, text_profiler.vocab)
         self.assertCountEqual(set(text_profiler.vocab), text_profiler.vocab)
 
         text_profiler.update(df3)
-        df = pd.concat([df1, df2, df3])
-        unique_vocab = dict.fromkeys("".join(df.tolist())).keys()
+        df = pl.concat([df1, df2, df3])
+        unique_vocab = dict.fromkeys("".join(df.to_list())).keys()
         self.assertCountEqual(unique_vocab, text_profiler.vocab)
 
     def test_profiled_str_numerics(self):
@@ -96,7 +96,7 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
             M2 = m_a + m_b + delta**2 * count_a * count_b / (count_a + count_b)
             return M2 / (count_a + count_b - 1)
 
-        df1 = pd.Series(
+        df1 = pl.Series(
             [
                 "abcd",
                 "aa",
@@ -110,11 +110,11 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
                 "2",
                 np.nan,
             ]
-        ).apply(str)
-        df2 = pd.Series(
+        ).map_elements(str)
+        df2 = pl.Series(
             ["1", "1", "ee", "ff", "ff", "gg", "gg", "abcd", "aa", "b", "ee", "b"]
-        ).apply(str)
-        df3 = pd.Series(
+        ).map_elements(str)
+        df3 = pl.Series(
             [
                 "NaN",
                 "b",
@@ -122,26 +122,28 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
                 "c",
                 None,
             ]
-        ).apply(str)
+        ).map_elements(str)
 
         text_profiler = TextColumn(df1.name)
         text_profiler.update(df1)
-
-        self.assertEqual(mean(df1.str.len()), text_profiler.mean)
-        self.assertAlmostEqual(var(df1.str.len()), text_profiler.variance)
-        self.assertAlmostEqual(np.sqrt(var(df1.str.len())), text_profiler.stddev)
-
+        self.assertEqual(mean(df1.str.len_chars().drop_nulls()), text_profiler.mean)
+        self.assertAlmostEqual(
+            var(df1.str.len_chars().drop_nulls()), text_profiler.variance
+        )
+        self.assertAlmostEqual(
+            np.sqrt(var(df1.str.len_chars().drop_nulls())), text_profiler.stddev
+        )
         variance = batch_variance(
             mean_a=text_profiler.mean,
             var_a=text_profiler.variance,
             count_a=text_profiler.sample_size,
-            mean_b=mean(df2.str.len()),
-            var_b=var(df2.str.len()),
+            mean_b=mean(df2.str.len_chars()),
+            var_b=var(df2.str.len_chars()),
             count_b=df2.count(),
         )
         text_profiler.update(df2)
-        df = pd.concat([df1, df2])
-        self.assertEqual(df.str.len().mean(), text_profiler.mean)
+        df = pl.concat([df1, df2])
+        self.assertEqual(df.str.len_chars().drop_nulls().mean(), text_profiler.mean)
         self.assertAlmostEqual(variance, text_profiler.variance)
         self.assertAlmostEqual(np.sqrt(variance), text_profiler.stddev)
 
@@ -149,19 +151,19 @@ def batch_variance(mean_a, var_a, count_a, mean_b, var_b, count_b):
             mean_a=text_profiler.mean,
             var_a=text_profiler.variance,
             count_a=text_profiler.match_count,
-            mean_b=mean(df3.str.len()),
-            var_b=var(df3.str.len()),
+            mean_b=mean(df3.str.len_chars().drop_nulls()),
+            var_b=var(df3.str.len_chars().drop_nulls()),
             count_b=df3.count(),
         )
         text_profiler.update(df3)
 
-        df = pd.concat([df1, df2, df3])
-        self.assertEqual(df.str.len().mean(), text_profiler.mean)
+        df = pl.concat([df1, df2, df3])
+        self.assertEqual(df.str.len_chars().drop_nulls().mean(), text_profiler.mean)
         self.assertAlmostEqual(variance, text_profiler.variance)
         self.assertAlmostEqual(np.sqrt(variance), text_profiler.stddev)
 
     def test_base_case(self):
-        data = pd.Series([], dtype=object)
+        data = pl.Series([], dtype=object)
         profiler = TextColumn(data.name)
         profiler.update(data)
         profiler.update(data)  # intentional to validate no changes if empty
@@ -174,7 +176,7 @@ def test_base_case(self):
 
     def test_data_ratio(self):
         # should always be 1.0 unless empty
-        df1 = pd.Series(
+        df1 = pl.Series(
             [
                 "abcd",
                 "aa",
@@ -187,7 +189,7 @@ def test_data_ratio(self):
                 "dfd",
                 "2",
             ]
-        ).apply(str)
+        ).map_elements(str)
 
         profiler = TextColumn(df1.name)
         profiler.update(df1)
@@ -198,31 +200,31 @@ def test_data_ratio(self):
         self.assertEqual(profiler.data_type_ratio, 1.0)
 
     def test_profiled_min(self):
-        df = pd.Series(["aaa", "aa", "aaaa", "aaa"]).apply(str)
+        df = pl.Series(["aaa", "aa", "aaaa", "aaa"]).map_elements(str)
 
         profiler = TextColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.min, 2)
 
-        df = pd.Series(["aa", "a"]).apply(str)
+        df = pl.Series(["aa", "a"]).map_elements(str)
         profiler.update(df)
         self.assertEqual(profiler.min, 1)
 
     def test_profiled_max(self):
-        df = pd.Series(["a", "aa", "a", "a"]).apply(str)
+        df = pl.Series(["a", "aa", "a", "a"]).map_elements(str)
 
         profiler = TextColumn(df.name)
         profiler.update(df)
         self.assertEqual(profiler.max, 2)
 
-        df = pd.Series(["aa", "aaa", "a"]).apply(str)
+        df = pl.Series(["aa", "aaa", "a"]).map_elements(str)
         profiler.update(df)
         self.assertEqual(profiler.max, 3)
 
     def test_profile(self):
-        df = pd.Series(
+        df = pl.Series(
             ["abcd", "aa", "abcd", "aa", "b", "4", "3", "2", "dfd", "2"]
-        ).apply(str)
+        ).map_elements(str)
         profiler = TextColumn(df.name)
         expected_profile = dict(
             min=1.0,
@@ -302,7 +304,7 @@ def test_report(self):
         `remove_disabled_flag`.
         """
         data = [2.0, 12.5, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
 
         options = TextOptions()  # With TextOptions and remove_disabled_flag == True
         options.vocab.is_enabled = False
@@ -327,8 +329,8 @@ def test_report(self):
         self.assertIn("vocab", report_keys)
 
     def test_option_timing(self):
-        data = [2.0, 12.5, "not a float", 6.0, "not a float"]
-        df = pd.Series(data).apply(str)
+        data = ["2.0", "12.5", "not a float", "6.0", "not a float"]
+        df = pl.Series(data).map_elements(str)
 
         options = TextOptions()
         options.set({"min.is_enabled": False})
@@ -376,13 +378,13 @@ def test_option_timing(self):
             self.assertCountEqual(expected, profiler.profile["times"])
 
     def test_merge_profile(self):
-        df = pd.Series(
+        df = pl.Series(
             ["abcd", "aa", "abcd", "aa", "b", "4", "3", "2", "dfd", "2"]
-        ).apply(str)
+        ).map_elements(str)
 
-        df2 = pd.Series(
+        df2 = pl.Series(
             ["hello", "my", "name", "is", "Grant", "I", "have", "67", "dogs"]
-        ).apply(str)
+        ).map_elements(str)
 
         expected_vocab = [
             "a",
@@ -450,7 +452,7 @@ def test_profile_merge_with_different_options(self):
         options.min.is_enabled = False
         options.histogram_and_quantiles.bin_count_or_method = None
 
-        df = pd.Series(
+        df = pl.Series(
             ["pancake", "banana", "lighthouse", "aa", "b", "4", "3", "2", "dfd", "2"]
         )
 
@@ -463,7 +465,7 @@ def test_profile_merge_with_different_options(self):
         options.max.is_enabled = False
         options.vocab.is_enabled = False
         options.histogram_and_quantiles.bin_count_or_method = None
-        df2 = pd.Series(
+        df2 = pl.Series(
             ["hello", "my", "name", "is", "Grant", "I", "have", "67", "dogs"]
         )
         profiler2 = TextColumn("Text", options=options)
@@ -499,12 +501,12 @@ def test_custom_bin_count_merge(self):
         options.histogram_and_quantiles.bin_count_or_method = 10
 
         data = ["this", "is", "a", "test"]
-        df = pd.Series(data).apply(str)
+        df = pl.Series(data).map_elements(str)
         profiler1 = TextColumn("Float", options)
         profiler1.update(df)
 
         data2 = ["this", "is", "another", "test"]
-        df2 = pd.Series(data2).apply(str)
+        df2 = pl.Series(data2).map_elements(str)
         profiler2 = TextColumn("Float", options)
         profiler2.update(df2)
 
@@ -543,13 +545,13 @@ def test_histogram_option_integration(self):
         self.assertEqual(["custom"], num_profiler.histogram_bin_method_names)
 
         # case when just 1 unique value, should just set bin size to be 1
-        num_profiler.update(pd.Series(["1", "1"]))
+        num_profiler.update(pl.Series(["1", "1"]))
         self.assertEqual(
             1, len(num_profiler.histogram_methods["custom"]["histogram"]["bin_counts"])
         )
 
         # case when more than 1 unique value, by virtue of a streaming update
-        num_profiler.update(pd.Series(["22"]))
+        num_profiler.update(pl.Series(["22"]))
         self.assertEqual(
             100, len(num_profiler._stored_histogram["histogram"]["bin_counts"])
         )
@@ -558,13 +560,13 @@ def test_histogram_option_integration(self):
         self.assertEqual(100, len(histogram["bin_counts"]))
 
     def test_diff(self):
-        df = pd.Series(
+        df = pl.Series(
             ["abcd", "aa", "abcd", "aa", "b", "4", "3", "2", "dfd", "2"]
-        ).apply(str)
+        ).map_elements(str)
 
-        df2 = pd.Series(
+        df2 = pl.Series(
             ["hello", "my", "name", "is", "Grant", "I", "have", "67", "dogs"]
-        ).apply(str)
+        ).map_elements(str)
 
         profiler1 = TextColumn(df.name)
         profiler1.update(df)
@@ -616,7 +618,7 @@ def test_diff(self):
 
     @mock.patch("time.time", return_value=0.0)
     def test_json_encode_after_update(self, time):
-        df = pd.Series(
+        df = pl.Series(
             [
                 "abcd",
                 "aa",
@@ -630,7 +632,7 @@ def test_json_encode_after_update(self, time):
                 "2",
                 "12.32",
             ]
-        ).apply(str)
+        ).map_elements(str)
 
         text_options = TextOptions()
         text_options.histogram_and_quantiles.bin_count_or_method = 5
@@ -711,7 +713,7 @@ def test_json_encode_after_update(self, time):
                         "kurtosis": "_get_kurtosis",
                         "histogram_and_quantiles": "_get_histogram_and_quantiles",
                     },
-                    "name": None,
+                    "name": "",
                     "col_index": np.nan,
                     "sample_size": 11,
                     "metadata": {},
@@ -754,7 +756,7 @@ def test_json_decode_after_update(self):
         # Actual deserialization
 
         # Build expected IntColumn
-        df_int = pd.Series(
+        df_int = pl.Series(
             [
                 "abcd",
                 "aa",
@@ -784,7 +786,7 @@ def test_json_decode_after_update(self):
         deserialized.report()
         test_utils.assert_profiles_equal(deserialized, expected_profile)
 
-        df_str = pd.Series(
+        df_str = pl.Series(
             [
                 "aa",  # add existing
                 "awsome",  # add new

From a9da02eed0348e94254389b197a28f830dfb0849 Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Sun, 3 Mar 2024 17:01:25 -0600
Subject: [PATCH 10/11] fully finished

---
 .../profilers/float_column_profile.py         |  17 +-
 dataprofiler/profilers/int_column_profile.py  |   8 +-
 .../profilers/numerical_column_stats.py       | 153 ++++++++----------
 dataprofiler/profilers/text_column_profile.py |  10 +-
 .../test_numeric_stats_mixin_profile.py       |  18 +--
 5 files changed, 95 insertions(+), 111 deletions(-)

diff --git a/dataprofiler/profilers/float_column_profile.py b/dataprofiler/profilers/float_column_profile.py
index f0783747..19bb19c6 100644
--- a/dataprofiler/profilers/float_column_profile.py
+++ b/dataprofiler/profilers/float_column_profile.py
@@ -280,7 +280,7 @@ def _get_float_precision(
 
         :param df_series_clean: df series with nulls removed, assumes all values
             are floats as well
-        :type df_series_clean: pandas.core.series.Series
+        :type df_series_clean: polars.series.series.Series
         :param sample_ratio: Ratio of samples used for float precision
         :type sample_ratio: float (between 0 and 1)
         :return: string representing its precision print format
@@ -332,9 +332,9 @@ def _is_each_row_float(cls, df_series: pl.Series) -> pl.Series:
         For column [1.0, np.NaN, 1.0] returns [True, True, True]
         For column [1.0, "a", "b"] returns [True, False, False]
         :param df_series: series of values to evaluate
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :return: is_float_col
-        :rtype: Union[List[bool], pandas.Series[bool]]
+        :rtype: pl.Series
         """
         if len(df_series) == 0:
             return pl.Series()
@@ -361,7 +361,7 @@ def _update_precision(
         subset before they are merged into the main data profile.
         :type subset_properties: dict
         :param df_series: Data to be profiled
-        :type df_series: pandas.DataFrame
+        :type df_series: polars.DataFrame
         :return: None
         """
         sample_ratio = None
@@ -403,19 +403,18 @@ def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
         Update column profile properties with cleaned dataset and its known profile.
 
         :param df_series_clean: df series with nulls removed
-        :type df_series_clean: pandas.core.series.Series
+        :type df_series_clean: polars.series.series.Series
         :param profile: float profile dictionary
         :type profile: dict
         :return: None
         """
-        df_series_clean = df_series_clean.to_pandas()
         if self._NumericStatsMixin__calculations:
             NumericStatsMixin._update_helper(self, df_series_clean, profile)
         self._update_column_base_properties(profile)
 
     def _update_numeric_stats(
         self,
-        df_series: pl.DataFrame,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -430,7 +429,7 @@ def _update_numeric_stats(
         subset before they are merged into the main data profile.
         :type subset_properties: Dict
         :param df_series: Data to be profiled
-        :type df_series: Pandas Dataframe
+        :type df_series: Polars Dataframe
         :return: None
         """
         super()._update_helper(df_series, subset_properties)
@@ -440,7 +439,7 @@ def update(self, df_series: pl.Series) -> FloatColumn:
         Update the column profile.
 
         :param df_series: df series
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :return: updated FloatColumn
         :rtype: FloatColumn
         """
diff --git a/dataprofiler/profilers/int_column_profile.py b/dataprofiler/profilers/int_column_profile.py
index 30e7a4a8..15394ce8 100644
--- a/dataprofiler/profilers/int_column_profile.py
+++ b/dataprofiler/profilers/int_column_profile.py
@@ -2,7 +2,6 @@
 from __future__ import annotations
 
 import numpy as np
-import pandas as pd
 import polars as pl
 
 from .base_column_profilers import BaseColumnPrimitiveTypeProfiler, BaseColumnProfiler
@@ -125,7 +124,7 @@ def _is_each_row_int(cls, df_series: pl.Series) -> list[bool]:
         For column [1.1 1.1 1.1] returns False
 
         :param df_series: series of values to evaluate
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :return: is_int_col
         :rtype: list
         """
@@ -140,12 +139,11 @@ def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
         Update col profile properties with clean dataset and its known null params.
 
         :param df_series_clean: df series with nulls removed
-        :type df_series_clean: pandas.core.series.Series
+        :type df_series_clean: polars.series.series.Series
         :param profile: int profile dictionary
         :type profile: dict
         :return: None
         """
-        df_series_clean = pd.Series(df_series_clean.to_numpy())
         if self._NumericStatsMixin__calculations:
             NumericStatsMixin._update_helper(self, df_series_clean, profile)
         self._update_column_base_properties(profile)
@@ -155,7 +153,7 @@ def update(self, df_series: pl.Series) -> IntColumn:
         Update the column profile.
 
         :param df_series: df series
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :return: updated IntColumn
         :rtype: IntColumn
         """
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index 549fcc43..0e8677d2 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -10,7 +10,6 @@
 
 import numpy as np
 import numpy.typing as npt
-import pandas as pd
 import polars as pl
 import scipy.stats
 
@@ -498,7 +497,6 @@ def diff(
                 "Unsupported operand type(s) for diff: '{}' "
                 "and '{}'".format(cls.__name__, other_profile.__class__.__name__)
             )
-        print(self.variance, other_profile.variance)
         differences = {
             "min": profiler_utils.find_diff_of_numbers(self.min, other_profile.min),
             "max": profiler_utils.find_diff_of_numbers(self.max, other_profile.max),
@@ -1125,10 +1123,9 @@ def _estimate_stats_from_histogram(self) -> np.float64:
         return var
 
     def _total_histogram_bin_variance(
-        self, input_array: np.ndarray | pd.Series
+        self, input_array: np.ndarray | pl.Series
     ) -> float:
-        if type(input_array) is pd.Series:
-            input_array = pl.from_pandas(input_array)
+        if type(input_array) is pl.Series:
             input_array = input_array.to_numpy()
         # calculate total variance over all bins of a histogram
         bin_counts = self._stored_histogram["histogram"]["bin_counts"]
@@ -1146,20 +1143,18 @@ def _total_histogram_bin_variance(
             sum_var += bin_var
         return sum_var
 
-    def _histogram_bin_error(self, input_array: np.ndarray | pd.Series) -> np.float64:
+    def _histogram_bin_error(self, input_array: np.ndarray | pl.Series) -> np.float64:
         """
         Calculate error of each value from bin of the histogram it falls within.
 
         :param input_array: input data used to calculate the histogram
-        :type input_array: Union[np.array, pd.Series]
+        :type input_array: Union[np.array, pl.Series]
         :return: binning error
         :rtype: float
         """
-        if type(input_array) is pd.Series:
-            input_array = pl.from_pandas(input_array)
+        if type(input_array) == pl.Series:
             input_array = input_array.to_numpy()
-        bin_edges = self._stored_histogram["histogram"]["bin_edges"]
-
+        bin_edges = self._stored_histogram["histogram"]["bin_edges"].astype(float)
         # account ofr digitize which is exclusive
         bin_edges = bin_edges.copy()
 
@@ -1280,7 +1275,7 @@ def _get_histogram(
         Uses np.histogram.
 
         :param values: input data values
-        :type values: Union[np.array, pd.Series]
+        :type values: Union[np.array, pl.Series]
         :return: bin edges and bin counts
         """
         if len(np.unique(values)) == 1:
@@ -1323,18 +1318,17 @@ def _get_histogram(
             bin_counts, bin_edges = np.histogram(values, bins=n_equal_bins)
         return bin_counts, bin_edges
 
-    def _merge_histogram(self, values: np.ndarray | pd.Series) -> None:
+    def _merge_histogram(self, values: np.ndarray | pl.Series) -> None:
         # values is the current array of values,
         # that needs to be updated to the accumulated histogram
-        if type(values) is pd.Series:
-            values = pl.from_pandas(values)
+        if type(values) == pl.Series:
             values = values.to_numpy()
         combined_values = np.concatenate([values, self._histogram_to_array()])
         bin_counts, bin_edges = self._get_histogram(combined_values)
         self._stored_histogram["histogram"]["bin_counts"] = bin_counts
         self._stored_histogram["histogram"]["bin_edges"] = bin_edges
 
-    def _update_histogram(self, df_series: pd.Series | np.ndarray) -> None:
+    def _update_histogram(self, df_series: pl.Series) -> None:
         """
         Update histogram for each method and the combined method.
 
@@ -1352,30 +1346,31 @@ def _update_histogram(self, df_series: pd.Series | np.ndarray) -> None:
         accumulated losses, and the best method with minimal loss is picked
 
         :param df_series: a given column
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :return:
         """
-        if self._greater_than_64_bit and type(df_series) is pd.Series:
-            df_series = df_series.to_numpy(dtype=float)
-            df_series = df_series[np.isfinite(df_series)]
-            if df_series.size == 0:
+        if self._greater_than_64_bit:
+            df_np_series = df_series.to_numpy()
+            df_np_series = df_np_series[np.isfinite(df_np_series)]
+            if df_np_series.size == 0:
                 return
+            if self._has_histogram:
+                self._merge_histogram(df_np_series)
+            else:
+                bin_counts, bin_edges = self._get_histogram(df_np_series)
+                self._stored_histogram["histogram"]["bin_counts"] = bin_counts
+                self._stored_histogram["histogram"]["bin_edges"] = bin_edges
         else:
-            df_series = pl.from_pandas(df_series, nan_to_null=True).cast(pl.Float64)
-            df_series = df_series.replace([np.inf, -np.inf], [None])  # type: ignore
-            df_series = df_series.drop_nulls()
+            df_series = df_series.filter(~df_series.is_infinite())
+            df_series = df_series.drop_nans()
             if df_series.is_empty():
                 return
-
-        if self._has_histogram:
-            if self._greater_than_64_bit:
-                self._merge_histogram(df_series.tolist())
+            if self._has_histogram:
+                self._merge_histogram(df_series)
             else:
-                self._merge_histogram(df_series.to_list())
-        else:
-            bin_counts, bin_edges = self._get_histogram(df_series)
-            self._stored_histogram["histogram"]["bin_counts"] = bin_counts
-            self._stored_histogram["histogram"]["bin_edges"] = bin_edges
+                bin_counts, bin_edges = self._get_histogram(df_series)
+                self._stored_histogram["histogram"]["bin_counts"] = bin_counts
+                self._stored_histogram["histogram"]["bin_edges"] = bin_edges
 
         # update loss for the stored bins
         histogram_loss = self._histogram_bin_error(df_series)
@@ -1749,36 +1744,30 @@ def _get_quantiles(self) -> None:
         ]
         self.quantiles = self._get_percentile(percentiles=percentiles)
 
-    def _update_helper(self, df_series_clean: pd.Series, profile: dict) -> None:
+    def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
         """
         Update base numerical profile properties w/ clean dataset and known null params.
 
         :param df_series_clean: df series with nulls removed
-        :type df_series_clean: pandas.core.series.Series
+        :type df_series_clean: polars.series.series.Series
         :param profile: numerical profile dictionary
         :type profile: dict
         :return: None
         """
-        self._greater_than_64_bit = (
-            not df_series_clean.empty
-            and df_series_clean.apply(pd.to_numeric, errors="coerce").dtype == "O"
-        )
+        self._greater_than_64_bit = df_series_clean.dtype == pl.Object
         if self._greater_than_64_bit:
-            df_series_clean = df_series_clean.to_numpy()
-            df_series_clean = df_series_clean[df_series_clean != np.nan]
-            if df_series_clean.size == 0:
+            df_np_series_clean = df_series_clean.to_numpy()
+            df_np_series_clean = df_np_series_clean[df_np_series_clean != np.nan]
+            if df_np_series_clean.size == 0:
                 return
-            df_series_clean = pd.Series(df_series_clean)
+            df_series_clean = pl.Series(df_np_series_clean)
         else:
-            df_series_clean = pl.from_pandas(df_series_clean)
             if df_series_clean.dtype == pl.String:
                 df_series_clean = df_series_clean.str.strip_chars().cast(pl.Float64)
             else:
                 df_series_clean = df_series_clean.cast(pl.Float64)
             if df_series_clean.is_empty():
                 return
-            df_series_clean = df_series_clean.to_pandas()
-            df_series_clean = df_series_clean.astype(float)
 
         prev_dependent_properties = {
             "mean": self.mean,
@@ -1800,15 +1789,14 @@ def _update_helper(self, df_series_clean: pd.Series, profile: dict) -> None:
     @BaseColumnProfiler._timeit(name="min")
     def _get_min(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
         if self._greater_than_64_bit:
-            min_value = np.min(df_series)
+            min_value = min(df_series)
             self.min = min_value if not self.min else min(self.min, min_value)
         else:
-            df_series = pl.from_pandas(df_series)
             min_value = df_series.min()
             self.min = np.float64(
                 min_value if not self.min else min(self.min, min_value)
@@ -1818,15 +1806,14 @@ def _get_min(
     @BaseColumnProfiler._timeit(name="max")
     def _get_max(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
         if self._greater_than_64_bit:
-            max_value = np.max(df_series)
+            max_value = max(df_series)
             self.max = max_value if not self.max else max(self.max, max_value)
         else:
-            df_series = pl.from_pandas(df_series)
             max_value = df_series.max()
             if self.max is not None:
                 max_value = type(self.max)(max_value)
@@ -1838,14 +1825,14 @@ def _get_max(
     @BaseColumnProfiler._timeit(name="sum")
     def _get_sum(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
         if np.isinf(self.sum) or (np.isnan(self.sum) and self.match_count > 0):
             return
         if self._greater_than_64_bit:
-            sum_value = np.sum(df_series)
+            sum_value = float(sum(df_series))
             if len(df_series) > 0 and sum_value == np.nan:
                 warnings.warn(
                     "Infinite or invalid values found in data. "
@@ -1854,7 +1841,6 @@ def _get_sum(
                     RuntimeWarning,
                 )
         else:
-            df_series = pl.from_pandas(df_series)
             sum_value = df_series.sum()
             if np.isinf(sum_value) or (len(df_series) > 0 and np.isnan(sum_value)):
                 warnings.warn(
@@ -1870,7 +1856,7 @@ def _get_sum(
     @BaseColumnProfiler._timeit(name="variance")
     def _get_variance(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -1879,9 +1865,8 @@ def _get_variance(
         ):
             return
         if self._greater_than_64_bit:
-            batch_biased_variance = np.var(df_series)
+            batch_biased_variance = np.var(df_series.to_numpy())
         else:
-            df_series = pl.from_pandas(df_series)
             batch_biased_variance = np.var([df_series])
         subset_properties["biased_variance"] = batch_biased_variance
         sum_value = subset_properties["sum"]
@@ -1900,7 +1885,7 @@ def _get_variance(
     @BaseColumnProfiler._timeit(name="skewness")
     def _get_skewness(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -1908,7 +1893,7 @@ def _get_skewness(
         Compute and update skewness of current dataset given new chunk.
 
         :param df_series: incoming data
-        :type df_series: pandas series
+        :type df_series: polars series
         :param prev_dependent_properties: pre-update values needed
             for computation
         :type prev_dependent_properties: dict
@@ -1924,11 +1909,10 @@ def _get_skewness(
         ):
             return
 
-        if self._greater_than_64_bit and type(df_series) is pd.Series:
-            df_series = df_series.to_numpy(dtype=float)
+        if self._greater_than_64_bit and type(df_series) is pl.Series:
+            batch_biased_skewness = profiler_utils.biased_skew(df_series.to_numpy())
         else:
-            df_series = pl.from_pandas(df_series, nan_to_null=False)
-        batch_biased_skewness = profiler_utils.biased_skew(df_series)
+            batch_biased_skewness = profiler_utils.biased_skew(df_series)
         subset_properties["biased_skewness"] = batch_biased_skewness
         batch_count = subset_properties["match_count"]
         batch_biased_var = subset_properties["biased_variance"]
@@ -1948,7 +1932,7 @@ def _get_skewness(
     @BaseColumnProfiler._timeit(name="kurtosis")
     def _get_kurtosis(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -1956,7 +1940,7 @@ def _get_kurtosis(
         Compute and update kurtosis of current dataset given new chunk.
 
         :param df_series: incoming data
-        :type df_series: pandas series
+        :type df_series: polars series
         :param prev_dependent_properties: pre-update values needed
             for computation
         :type prev_dependent_properties: dict
@@ -1972,11 +1956,10 @@ def _get_kurtosis(
         ):
             return
 
-        if self._greater_than_64_bit and type(df_series) is pd.Series:
-            df_series = df_series.to_numpy(dtype=float)
+        if self._greater_than_64_bit:
+            batch_biased_kurtosis = profiler_utils.biased_kurt(df_series.to_numpy())
         else:
-            df_series = pl.from_pandas(df_series, nan_to_null=False)
-        batch_biased_kurtosis = profiler_utils.biased_kurt(df_series)
+            batch_biased_kurtosis = profiler_utils.biased_kurt(df_series)
         subset_properties["biased_kurtosis"] = batch_biased_kurtosis
         batch_count = subset_properties["match_count"]
         batch_biased_var = subset_properties["biased_variance"]
@@ -1999,7 +1982,7 @@ def _get_kurtosis(
     @BaseColumnProfiler._timeit(name="histogram_and_quantiles")
     def _get_histogram_and_quantiles(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -2017,7 +2000,7 @@ def _get_histogram_and_quantiles(
     @BaseColumnProfiler._timeit(name="num_zeros")
     def _get_num_zeros(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -2025,23 +2008,26 @@ def _get_num_zeros(
         Get the count of zeros in the numerical column.
 
         :param df_series: df series
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :param prev_dependent_properties: previous dependent properties
         :type prev_dependent_properties: dict
         :param subset_properties: subset of properties
         :type subset_properties: dict
         :return: None
         """
-        if not self._greater_than_64_bit:
-            df_series = pl.from_pandas(df_series)
-        num_zeros_value = (df_series == 0).sum()
+        if df_series.is_empty():
+            num_zeros_value = 0
+        elif self._greater_than_64_bit:
+            num_zeros_value = int((df_series.to_numpy() == 0).sum())
+        else:
+            num_zeros_value = int((df_series == 0).sum())
         subset_properties["num_zeros"] = num_zeros_value
         self.num_zeros = self.num_zeros + num_zeros_value
 
     @BaseColumnProfiler._timeit(name="num_negatives")
     def _get_num_negatives(
         self,
-        df_series: pd.Series | np.ndarray,
+        df_series: pl.Series,
         prev_dependent_properties: dict,
         subset_properties: dict,
     ) -> None:
@@ -2049,16 +2035,19 @@ def _get_num_negatives(
         Get the count of negative numbers in the numerical column.
 
         :param df_series: df series
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :param prev_dependent_properties: previous dependent properties
         :type prev_dependent_properties: dict
         :param subset_properties: subset of properties
         :type subset_properties: dict
         :return: None
         """
-        if not self._greater_than_64_bit:
-            df_series = pl.from_pandas(df_series)
-        num_negatives_value = (df_series < 0).sum()
+        if df_series.is_empty():
+            num_negatives_value = 0
+        elif self._greater_than_64_bit:
+            num_negatives_value = int((df_series.to_numpy() < 0).sum())
+        else:
+            num_negatives_value = int((df_series < 0).sum())
         subset_properties["num_negatives"] = num_negatives_value
         self.num_negatives = self.num_negatives + num_negatives_value
 
@@ -2068,7 +2057,7 @@ def update(self, df_series: pl.Series) -> NumericStatsMixin:
         Update the numerical profile properties with an uncleaned dataset.
 
         :param df_series: df series with nulls removed
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :return: None
         """
         raise NotImplementedError()
diff --git a/dataprofiler/profilers/text_column_profile.py b/dataprofiler/profilers/text_column_profile.py
index f2ea321e..200bd5d3 100644
--- a/dataprofiler/profilers/text_column_profile.py
+++ b/dataprofiler/profilers/text_column_profile.py
@@ -141,7 +141,7 @@ def _update_vocab(
         Find the unique vocabulary used in the text column.
 
         :param data: list or array of data from which to extract vocab
-        :type data: Union[list, numpy.array, pandas.DataFrame]
+        :type data: Union[list, numpy.array, polars.DataFrame]
         :param prev_dependent_properties: Contains all the previous properties
             that the calculations depend on.
         :type prev_dependent_properties: dict
@@ -158,16 +158,14 @@ def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
         Update col profile properties with clean dataset and its known null parameters.
 
         :param df_series_clean: df series with nulls removed
-        :type df_series_clean: pandas.core.series.Series
+        :type df_series_clean: polars.series.series.Series
         :param profile: text profile dictionary
         :type profile: dict
         :return: None
         """
         if self._NumericStatsMixin__calculations:
             text_lengths = df_series_clean.str.len_chars()
-            NumericStatsMixin._update_helper(
-                self, text_lengths.drop_nulls().to_pandas(), profile
-            )
+            NumericStatsMixin._update_helper(self, text_lengths.drop_nulls(), profile)
         self._update_column_base_properties(profile)
         if self.max:
             self.type = "string" if self.max <= 255 else "text"
@@ -177,7 +175,7 @@ def update(self, df_series: pl.Series) -> TextColumn:
         Update the column profile.
 
         :param df_series: df series
-        :type df_series: pandas.core.series.Series
+        :type df_series: polars.series.series.Series
         :return: updated TextColumn
         :rtype: TextColumn
         """
diff --git a/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py b/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
index 7b4d2ccc..a1291276 100644
--- a/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
+++ b/dataprofiler/tests/profilers/test_numeric_stats_mixin_profile.py
@@ -6,7 +6,7 @@
 from unittest import mock
 
 import numpy as np
-import pandas as pd
+import polars as pl
 
 from dataprofiler.profilers import NumericStatsMixin
 from dataprofiler.profilers.base_column_profilers import BaseColumnProfiler
@@ -340,7 +340,7 @@ def test_timeit(self):
             "biased_skewness": 0,
         }
         data = np.array([0, 0, 0, 0, 0])
-        df_series = pd.Series(data)
+        df_series = pl.Series(data)
         subset_properties = {"min": 0, "match_count": 0}
 
         time_array = [float(i) for i in range(24, 0, -1)]
@@ -547,21 +547,21 @@ def test_num_zeros(self):
         prev_dependent_properties = {"mean": 0}
         subset_properties = {"num_zeros": 0}
 
-        df_series = pd.Series([])
+        df_series = pl.Series([])
         num_profiler._get_num_zeros(
             df_series, prev_dependent_properties, subset_properties
         )
         self.assertEqual(subset_properties["num_zeros"], 0)
 
         data = np.array([0, 0, 0, 0, 0])
-        df_series = pd.Series(data)
+        df_series = pl.Series(data)
         num_profiler._get_num_zeros(
             df_series, prev_dependent_properties, subset_properties
         )
         self.assertEqual(subset_properties["num_zeros"], 5)
 
         data = np.array([000.0, 0.00, 0.000, 1.11234, 0, -1])
-        df_series = pd.Series(data)
+        df_series = pl.Series(data)
         num_profiler._get_num_zeros(
             df_series, prev_dependent_properties, subset_properties
         )
@@ -574,21 +574,21 @@ def test_num_negatives(self):
         prev_dependent_properties = {"mean": 0}
         subset_properties = {"num_negatives": 0}
 
-        df_series = pd.Series([])
+        df_series = pl.Series([])
         num_profiler._get_num_negatives(
             df_series, prev_dependent_properties, subset_properties
         )
         self.assertEqual(subset_properties["num_negatives"], 0)
 
         data = np.array([0, 0, 0, 0, 0])
-        df_series = pd.Series(data)
+        df_series = pl.Series(data)
         num_profiler._get_num_negatives(
             df_series, prev_dependent_properties, subset_properties
         )
         self.assertEqual(subset_properties["num_negatives"], 0)
 
         data = np.array([1, 0, -0.003, -16, -1.0, -24.45])
-        df_series = pd.Series(data)
+        df_series = pl.Series(data)
         num_profiler._get_num_negatives(
             df_series, prev_dependent_properties, subset_properties
         )
@@ -675,7 +675,7 @@ def test_timeit_num_zeros_and_negatives(self):
         # Dummy data to make min call
         prev_dependent_properties = {"mean": 0}
         data = np.array([0, 0, 0, 0, 0])
-        df_series = pd.Series(data)
+        df_series = pl.Series(data)
         subset_properties = {"num_zeros": 0, "num_negatives": 0}
 
         time_array = [float(i) for i in range(4, 0, -1)]

From 30a4c24bcb336999dc861cdb5d9e76c4e764c241 Mon Sep 17 00:00:00 2001
From: Andrew Li <atl15c02@gmail.com>
Date: Sun, 3 Mar 2024 20:00:52 -0600
Subject: [PATCH 11/11] fix pandas df in update

---
 dataprofiler/profilers/float_column_profile.py | 4 ++++
 dataprofiler/profilers/int_column_profile.py   | 4 ++++
 dataprofiler/profilers/text_column_profile.py  | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/dataprofiler/profilers/float_column_profile.py b/dataprofiler/profilers/float_column_profile.py
index 19bb19c6..61c222a1 100644
--- a/dataprofiler/profilers/float_column_profile.py
+++ b/dataprofiler/profilers/float_column_profile.py
@@ -4,6 +4,7 @@
 import copy
 
 import numpy as np
+import pandas as pd
 import polars as pl
 
 from . import profiler_utils
@@ -443,6 +444,9 @@ def update(self, df_series: pl.Series) -> FloatColumn:
         :return: updated FloatColumn
         :rtype: FloatColumn
         """
+        # TODO remove onces profiler builder is updated
+        if type(df_series) == pd.Series:
+            df_series = pl.from_pandas(df_series)  # type: ignore
         if len(df_series) == 0:
             return self
         is_each_row_float = self._is_each_row_float(df_series).replace(None, False)
diff --git a/dataprofiler/profilers/int_column_profile.py b/dataprofiler/profilers/int_column_profile.py
index 15394ce8..15fb3fbd 100644
--- a/dataprofiler/profilers/int_column_profile.py
+++ b/dataprofiler/profilers/int_column_profile.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import numpy as np
+import pandas as pd
 import polars as pl
 
 from .base_column_profilers import BaseColumnPrimitiveTypeProfiler, BaseColumnProfiler
@@ -157,6 +158,9 @@ def update(self, df_series: pl.Series) -> IntColumn:
         :return: updated IntColumn
         :rtype: IntColumn
         """
+        # TODO remove onces profiler builder is updated
+        if type(df_series) == pd.Series:
+            df_series = pl.from_pandas(df_series)  # type: ignore
         self._greater_than_64_bit = df_series.dtype == pl.Object
         if len(df_series) == 0:
             return self
diff --git a/dataprofiler/profilers/text_column_profile.py b/dataprofiler/profilers/text_column_profile.py
index 200bd5d3..a44bc1b7 100644
--- a/dataprofiler/profilers/text_column_profile.py
+++ b/dataprofiler/profilers/text_column_profile.py
@@ -4,6 +4,7 @@
 import itertools
 
 import numpy as np
+import pandas as pd
 import polars as pl
 
 from . import profiler_utils
@@ -179,6 +180,9 @@ def update(self, df_series: pl.Series) -> TextColumn:
         :return: updated TextColumn
         :rtype: TextColumn
         """
+        # TODO remove onces profiler builder is updated
+        if type(df_series) == pd.Series:
+            df_series = pl.from_pandas(df_series)  # type: ignore
         len_df = len(df_series)
         if len_df == 0:
             return self