SEOD-1326. Bump fireant to pandas version 2 (#366)

AzisK · web-flow · commit 4a1216fcbcc9 · 2023-09-05T18:04:05.000+03:00
* SEOD-1326. Bump fireant to pandas version 2

- Bump fireant version to 8.0.0
- Drop support for Python3.7 as pandas v2 does not support it
- Add Python3.9 support
- Use the newest `pandas` (2.0.3 in requirements) and add the minimum version 2.0.0 of `pandas` to pyproject.toml
- Use the newest `vertica-python` (1.3.4 in requirements) and add the minimum version 1.0.0 of `vertica-python`  to pyproject.toml
- Use the newest `snowflake-connector-python` (3.0.4 in requirements) and add the minimum version 3.0.0 of `snowflake-connector-python` to pyproject.toml
- Use the newest `coverage` (7.3.0 in requirements) and add the minimum version 7.3.0 of `coverage` to pyproject.toml
- Use the newest `watchdog` (3.0.0 in requirements) and add the minimum version 3.0.0 of `watchdiog` to pyproject.toml
- Remove `python-dateutil` from dependencies as it is part of other libraries' dependencies
- Bump `psycopg-binary==2.9.6` though it seems not needed for the tests
- Bump `pymssql==2.2.7` though it seems not needed for the tests
- Bump `Cython==3.0.0` though it seems not needed for the tests
- Get rid of `SyntaxWarning: "is" with a literal. Did you mean "=="?`
- Get rid of `DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working`
- Replace `Dataframe.append` to `pd.concat` because `append` does not exist since pandas v2
- Add `group_keys=False` to `DataFrame.groupby()` method because it is no longer ignored since pandas v1.5
- Fix `_apply_share` method for `Share`s with the new libraries.
- Rename `TestDatabase` to `MockDatabase` since it is used only for mocking. This is beneficial also because Python testing method will not delve into it to find methods to run
- Rename `test_connect` and `test_fetch` to `mock_connect` and `mock_fetch` as these are mocks. This is beneficial also because Python testing method will not delve into it to find methods to run
- Rename `TestMySQLDatabase` to `MockMySQLDatabase` for the same reason
- When concatenating `DataFrames`, use `.tail(1)` instead of `.iloc[-1]` as it includes indexes
- Use static CSVs to get the expected `DataFrames` in tests instead of applying methods of `fireant` to a `DataFrame` to get those expected `DataFrames`
- Replace `np.float` to `float` since it was deprecated
- Get rid of `None` and `[]` as `ascending` parameters for `Pandas` class
- Replace `.iteritems()` with `.items()` as the former method was deprecated
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,8 @@
 2023 July
 
-#### [7.10.0] - 2023-08-29
-- Bump fireant version to 7.10.0
+
+#### [8.0.0] - 2023-08-29
+- Bump fireant version to 8.0.0
 - Drop support for Python3.7 as pandas v2 does not support it
 - Add Python3.9 support
 - Use the newest `pandas` (2.0.3 in requirements) and add the minimum version 2.0.0 of `pandas` to pyproject.toml
diff --git a/fireant/__init__.py b/fireant/__init__.py
@@ -55,4 +55,4 @@ def __hash__(self) -> int:
 Term.__hash__ = __hash__
 
 
-__version__ = "7.9.0"
+__version__ = "8.0.0"
diff --git a/fireant/database/base.py b/fireant/database/base.py
@@ -195,7 +195,11 @@ def make_slicer_query_with_totals_and_references(
             )
 
             for reference_parts, references in reference_groups_and_none:
-                (dimensions_with_ref, metrics_with_ref, filters_with_ref,) = self.adapt_for_reference_query(
+                (
+                    dimensions_with_ref,
+                    metrics_with_ref,
+                    filters_with_ref,
+                ) = self.adapt_for_reference_query(
                     reference_parts,
                     dimensions_with_totals,
                     metrics,
@@ -301,7 +305,7 @@ def make_slicer_query(
         # In the case that the orders are determined by a field that is not selected as a metric or dimension, then it needs
         # to be added to the query.
         select_aliases = {el.alias for el in query._selects}
-        for (orderby_field, orientation) in orders:
+        for orderby_field, orientation in orders:
             orderby_term = self.transform_field_to_query(orderby_field)
             query = query.orderby(orderby_term, order=orientation)
 
diff --git a/fireant/queries/builder/dataset_blender_query_builder.py b/fireant/queries/builder/dataset_blender_query_builder.py
@@ -243,7 +243,7 @@ def _deepcopy_recursive(node):
     if hasattr(node, '_cases'):
         cloned_cases = []
 
-        for (criterion, value) in cloned_node._cases:
+        for criterion, value in cloned_node._cases:
             cloned_cases.append((_deepcopy_recursive(criterion), _deepcopy_recursive(value)))
 
         cloned_node._cases = cloned_cases
@@ -423,7 +423,6 @@ def sql(self):
         # First determine the metrics. If a a metric is requested, and the dataset has it, add it for that dataset.
         # We include metrics used in filters. We also save for each dataset the mapped metrics and filters
         for dataset_index, dataset in enumerate(datasets):
-
             dataset_metrics.append(
                 map_blender_fields_to_dataset_fields(
                     selected_metrics_as_dataset_fields,
@@ -478,7 +477,7 @@ def sql(self):
         for dimension_dataset_info in dimensions_dataset_info:
             dimension_accounted_for = False
             first_dataset_that_has_the_dimension = None
-            for (dataset_index, mapped_dimension, is_selected_dimension) in dimension_dataset_info:
+            for dataset_index, mapped_dimension, is_selected_dimension in dimension_dataset_info:
                 # If the dataset is already part of the final query, add this dimension
                 if dataset_included_in_final_query[dataset_index]:
                     dimension_accounted_for = True
diff --git a/fireant/tests/dataset/mocks.py b/fireant/tests/dataset/mocks.py
@@ -728,13 +728,19 @@ def _totals(df):
 
 dimx2_date_str_totalsx2_share_over_first_series = pd.read_csv(
     os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx2_date_str_totalsx2_share_over_first_df.csv",
-    index_col=['$timestamp', '$political_party',],
+    index_col=[
+        '$timestamp',
+        '$political_party',
+    ],
     parse_dates=['$timestamp'],
 ).squeeze()
 
 dimx2_date_str_totalsx2_share_over_second_series = pd.read_csv(
     os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx2_date_str_totalsx2_share_over_second_df.csv",
-    index_col=['$timestamp', '$political_party',],
+    index_col=[
+        '$timestamp',
+        '$political_party',
+    ],
     parse_dates=['$timestamp'],
 ).squeeze()
 
@@ -744,7 +750,11 @@ def _totals(df):
 
 dimx3_date_str_str_totals_df = pd.read_csv(
     os.path.dirname(os.path.realpath(__file__)) + "/mocks/dimx3_date_str_str_totals_df.csv",
-    index_col=['$timestamp', '$political_party', '$state',],
+    index_col=[
+        '$timestamp',
+        '$political_party',
+        '$state',
+    ],
     parse_dates=['$timestamp'],
 )
 
diff --git a/fireant/tests/dataset/test_execution.py b/fireant/tests/dataset/test_execution.py
@@ -288,10 +288,12 @@ def test_reduce_single_result_set_with_str_dimension(self):
         pandas.testing.assert_frame_equal(expected, result)
 
     def test_reduce_single_result_set_with_dimx2_date_str_totals_date(self):
-        expected = pd.concat([
-            dimx2_date_str_totalsx2_df.loc[(slice(None), slice("Democrat", "Republican")), :],
-            dimx2_date_str_totalsx2_df.tail(1),
-        ])
+        expected = pd.concat(
+            [
+                dimx2_date_str_totalsx2_df.loc[(slice(None), slice("Democrat", "Republican")), :],
+                dimx2_date_str_totalsx2_df.tail(1),
+            ]
+        )
 
         raw_df = replace_totals(dimx2_date_str_df)
         totals_df = pd.merge(
@@ -351,10 +353,12 @@ def test_reduce_single_result_set_with_dimx2_date_str_str_totals_date(self):
         pandas.testing.assert_frame_equal(expected, result)
 
     def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals(self):
-        expected = pd.concat([
-            dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("California", "Texas")), :],
-            dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[:-1],
-        ]).sort_index()
+        expected = pd.concat(
+            [
+                dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("California", "Texas")), :],
+                dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[:-1],
+            ]
+        ).sort_index()
 
         raw_df = replace_totals(dimx3_date_str_str_df)
         totals_df = raw_df.groupby("$timestamp").sum().reset_index()
@@ -408,18 +412,17 @@ def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals_with_
         nulls_totals[index_names[1]] = "~~totals"
         nulls_totals[index_names[2]] = "~~totals"
 
-        expected = pd.concat([
-            dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("1", "2")), :],
-            dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].tail(1),
-            nulls.set_index(index_names),
-            nulls_totals.set_index(index_names),
-        ]).sort_index()
+        expected = pd.concat(
+            [
+                dimx3_date_str_str_totalsx3_df.loc[(slice(None), slice(None), slice("1", "2")), :],
+                dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].tail(1),
+                nulls.set_index(index_names),
+                nulls_totals.set_index(index_names),
+            ]
+        ).sort_index()
 
         raw_df = replace_totals(dimx3_date_str_str_df)
-        raw_df = pd.concat([
-            nulls,
-            raw_df
-        ]).sort_values(["$timestamp", "$political_party", "$state"])
+        raw_df = pd.concat([nulls, raw_df]).sort_values(["$timestamp", "$political_party", "$state"])
 
         totals_df = raw_df.groupby("$timestamp").sum().reset_index()
         null_totals_df = pd.DataFrame([raw_df[raw_df["$timestamp"].isnull()][metrics].sum()])
diff --git a/fireant/tests/dataset/test_filter_totals_from_share_results.py b/fireant/tests/dataset/test_filter_totals_from_share_results.py
@@ -82,10 +82,12 @@ def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_higher_d
             dimx2_date_str_totalsx2_df, [Rollup(mock_dataset.fields.timestamp), mock_dataset.fields.political_party]
         )
 
-        expected = pd.concat([
-            dimx2_date_str_totalsx2_df.loc[(slice(None), slice('Democrat', 'Republican')), :],
-            dimx2_date_str_totalsx2_df.tail(1),
-        ])
+        expected = pd.concat(
+            [
+                dimx2_date_str_totalsx2_df.loc[(slice(None), slice('Democrat', 'Republican')), :],
+                dimx2_date_str_totalsx2_df.tail(1),
+            ]
+        )
 
         pandas.testing.assert_frame_equal(result, expected)
 
diff --git a/fireant/tests/queries/test_build_sets.py b/fireant/tests/queries/test_build_sets.py
@@ -25,6 +25,7 @@
     ],
 )
 
+
 # noinspection SqlDialectInspection,SqlNoDataSourceInspection
 class ResultSetTests(TestCase):
     maxDiff = None
diff --git a/fireant/tests/widgets/test_matplotlib.py b/fireant/tests/widgets/test_matplotlib.py
@@ -26,6 +26,5 @@ def test_single_metric_line_chart(self):
 
             self.assertEqual(1, len(result))
 
-
 except ImportError:
     pass
diff --git a/fireant/tests/widgets/test_pandas.py b/fireant/tests/widgets/test_pandas.py
@@ -573,7 +573,9 @@ def test_use_first_value_for_ascending_when_arg_has_invalid_length(self):
 
     def test_use_pandas_default_for_ascending_when_arg_empty_list(self):
         result = Pandas(
-            mock_dataset.fields.votes, pivot=[mock_dataset.fields.political_party], sort=[0, 2],
+            mock_dataset.fields.votes,
+            pivot=[mock_dataset.fields.political_party],
+            sort=[0, 2],
         ).transform(dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])
 
         expected = dimx2_date_str_df.copy()[[f('votes')]]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fireant"
-version = "7.10.0"
+version = "8.0.0"
 description = ""
 authors = ["Ąžuolas Krušna <akrusna@kayak.com>"]
 readme = "README.rst"

Original file line number	Diff line number	Diff line change
`@@ -55,4 +55,4 @@ def __hash__(self) -> int:`
`55`	`55`	`Term.__hash__ = __hash__`
`56`	`56`
`57`	`57`
`58`		`-__version__ = "7.9.0"`
	`58`	`+__version__ = "8.0.0"`
Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@`
`25`	`25`	`],`
`26`	`26`	`)`
`27`	`27`
	`28`	`+`
`28`	`29`	`# noinspection SqlDialectInspection,SqlNoDataSourceInspection`
`29`	`30`	`class ResultSetTests(TestCase):`
`30`	`31`	`maxDiff = None`