upgrade to support python 3.11

apache · May 10, 2023 · 61cfee5 · 61cfee5
1 parent 433dbca
commit 61cfee5
Show file tree

Hide file tree

Showing 25 changed files with 172 additions and 532 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -47,7 +47,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.11"]
         os: [macos-latest, windows-latest]
     steps:
       - uses: actions/checkout@v3
@@ -106,7 +106,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.11"]
     steps:
       - uses: actions/checkout@v3
 

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           miniforge-variant: Mambaforge
           use-mamba: true
-          python-version: "3.10"
+          python-version: "3.11"
           channel-priority: strict
       - name: Install dependencies
         run: |

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
@@ -29,6 +29,6 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.11"
       - name: Audit licenses
         run: ./dev/release/run-rat.sh .
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
@@ -35,21 +35,21 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.10"
+          python-version: "3.11"
 
       - name: Install Protoc
         uses: arduino/setup-protoc@v1
         with:
-          version: '3.x'
+          version: "3.x"
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Install dependencies
         run: |
           set -x
           python3 -m venv venv
           source venv/bin/activate
-          pip install -r requirements-310.txt
-          pip install -r docs/requirements.txt
+          pip install --require-hashes --no-deps -r requirements.txt
+          pip install --require-hashes --no-deps -r docs/requirements.txt
       - name: Build Datafusion
         run: |
           set -x

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -33,15 +33,13 @@ jobs:
       fail-fast: false
       matrix:
         python-version:
+          - "3.7"
+          - "3.8"
+          - "3.9"
           - "3.10"
+          - "3.11"
         toolchain:
           - "stable"
-          # we are not that much eager in walking on the edge yet
-          # - nightly
-        # build stable for only 3.7
-        include:
-          - python-version: "3.7"
-            toolchain: "stable"
     steps:
       - uses: actions/checkout@v3
 
@@ -55,7 +53,7 @@ jobs:
       - name: Install Protoc
         uses: arduino/setup-protoc@v1
         with:
-          version: '3.x'
+          version: "3.x"
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Setup Python
@@ -71,34 +69,38 @@ jobs:
 
       - name: Check Formatting
         uses: actions-rs/cargo@v1
-        if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
+        if: ${{ matrix.python-version == '3.11' && matrix.toolchain == 'stable' }}
         with:
           command: fmt
           args: -- --check
 
       - name: Run Clippy
         uses: actions-rs/cargo@v1
-        if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
+        if: ${{ matrix.python-version == '3.11' && matrix.toolchain == 'stable' }}
         with:
           command: clippy
           args: --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure
 
-      - name: Create Virtualenv (3.10)
-        if: ${{ matrix.python-version == '3.10' }}
+      - name: Create Virtualenv (>= 3.8)
+        if: ${{ matrix.python-version != '3.7' }}
         run: |
           python -m venv venv
           source venv/bin/activate
-          pip install -r requirements-310.txt
+          pip install -U pip
+          # only required on versions < 3.11 because of Pytest 7
+          pip install 'exceptiongroup>=1.0.0rc8;python_version<"3.11"'
+          pip install --require-hashes --no-deps -r requirements.txt
 
       - name: Create Virtualenv (3.7)
         if: ${{ matrix.python-version == '3.7' }}
         run: |
           python -m venv venv
           source venv/bin/activate
-          pip install -r requirements-37.txt
+          pip install -U pip
+          pip install --require-hashes --no-deps -r requirements-37.txt
 
       - name: Run Python Linters
-        if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
+        if: ${{ matrix.python-version == '3.11' && matrix.toolchain == 'stable' }}
         run: |
           source venv/bin/activate
           flake8 --exclude venv,benchmarks/db-benchmark --ignore=E501,W503

diff --git a/README.md b/README.md
@@ -202,7 +202,7 @@ source venv/bin/activate
 # update pip itself if necessary
 python -m pip install -U pip
 # install dependencies (for Python 3.8+)
-python -m pip install -r requirements-310.txt
+python -m pip install --require-hashes --no-deps -r requirements.txt
 ```
 
 The tests rely on test data in git submodules.
@@ -235,13 +235,13 @@ To change test dependencies, change the `requirements.in` and run
 ```bash
 # install pip-tools (this can be done only once), also consider running in venv
 python -m pip install pip-tools
-python -m piptools compile --generate-hashes -o requirements-310.txt
+python -m piptools compile --generate-hashes -o requirements.txt
 ```
 
 To update dependencies, run with `-U`
 
 ```bash
-python -m piptools compile -U --generate-hashes -o requirements-310.txt
+python -m piptools compile -U --generate-hashes -o requirements.txt
 ```
 
 More details [here](https://github.com/jazzband/pip-tools)
diff --git a/datafusion/__init__.py b/datafusion/__init__.py
@@ -204,9 +204,7 @@ def udaf(accum, input_type, return_type, state_type, volatility, name=None):
     Create a new User Defined Aggregate Function
     """
     if not issubclass(accum, Accumulator):
-        raise TypeError(
-            "`accum` must implement the abstract base class Accumulator"
-        )
+        raise TypeError("`accum` must implement the abstract base class Accumulator")
     if name is None:
         name = accum.__qualname__.lower()
     return AggregateUDF(

diff --git a/datafusion/cudf.py b/datafusion/cudf.py
@@ -51,9 +51,7 @@ def to_cudf_df(self, plan):
         elif isinstance(node, TableScan):
             return cudf.read_parquet(self.parquet_tables[node.table_name()])
         else:
-            raise Exception(
-                "unsupported logical operator: {}".format(type(node))
-            )
+            raise Exception("unsupported logical operator: {}".format(type(node)))
 
     def sql(self, sql):
         datafusion_df = self.datafusion_ctx.sql(sql)

diff --git a/datafusion/pandas.py b/datafusion/pandas.py
@@ -51,9 +51,7 @@ def to_pandas_df(self, plan):
         elif isinstance(node, TableScan):
             return pd.read_parquet(self.parquet_tables[node.table_name()])
         else:
-            raise Exception(
-                "unsupported logical operator: {}".format(type(node))
-            )
+            raise Exception("unsupported logical operator: {}".format(type(node)))
 
     def sql(self, sql):
         datafusion_df = self.datafusion_ctx.sql(sql)

diff --git a/datafusion/polars.py b/datafusion/polars.py
@@ -50,9 +50,7 @@ def to_polars_df(self, plan):
             args = [self.to_polars_expr(expr) for expr in node.projections()]
             return inputs[0].select(*args)
         elif isinstance(node, Aggregate):
-            groupby_expr = [
-                self.to_polars_expr(expr) for expr in node.group_by_exprs()
-            ]
+            groupby_expr = [self.to_polars_expr(expr) for expr in node.group_by_exprs()]
             aggs = []
             for expr in node.aggregate_exprs():
                 expr = expr.to_variant()
@@ -66,17 +64,13 @@ def to_polars_df(self, plan):
                             )
                         )
                 else:
-                    raise Exception(
-                        "Unsupported aggregate function {}".format(expr)
-                    )
+                    raise Exception("Unsupported aggregate function {}".format(expr))
             df = inputs[0].groupby(groupby_expr).agg(aggs)
             return df
         elif isinstance(node, TableScan):
             return polars.read_parquet(self.parquet_tables[node.table_name()])
         else:
-            raise Exception(
-                "unsupported logical operator: {}".format(type(node))
-            )
+            raise Exception("unsupported logical operator: {}".format(type(node)))
 
     def sql(self, sql):
         datafusion_df = self.datafusion_ctx.sql(sql)

diff --git a/datafusion/tests/generic.py b/datafusion/tests/generic.py
@@ -50,9 +50,7 @@ def data_datetime(f):
         datetime.datetime.now() - datetime.timedelta(days=1),
         datetime.datetime.now() + datetime.timedelta(days=1),
     ]
-    return pa.array(
-        data, type=pa.timestamp(f), mask=np.array([False, True, False])
-    )
+    return pa.array(data, type=pa.timestamp(f), mask=np.array([False, True, False]))
 
 
 def data_date32():
@@ -61,9 +59,7 @@ def data_date32():
         datetime.date(1980, 1, 1),
         datetime.date(2030, 1, 1),
     ]
-    return pa.array(
-        data, type=pa.date32(), mask=np.array([False, True, False])
-    )
+    return pa.array(data, type=pa.date32(), mask=np.array([False, True, False]))
 
 
 def data_timedelta(f):
@@ -72,9 +68,7 @@ def data_timedelta(f):
         datetime.timedelta(days=1),
         datetime.timedelta(seconds=1),
     ]
-    return pa.array(
-        data, type=pa.duration(f), mask=np.array([False, True, False])
-    )
+    return pa.array(data, type=pa.duration(f), mask=np.array([False, True, False]))
 
 
 def data_binary_other():

diff --git a/datafusion/tests/test_aggregation.py b/datafusion/tests/test_aggregation.py
@@ -80,9 +80,7 @@ def test_built_in_aggregation(df):
     assert result.column(2) == pa.array([4])
     assert result.column(3) == pa.array([6])
     assert result.column(4) == pa.array([[4, 4, 6]])
-    np.testing.assert_array_almost_equal(
-        result.column(5), np.average(values_a)
-    )
+    np.testing.assert_array_almost_equal(result.column(5), np.average(values_a))
     np.testing.assert_array_almost_equal(
         result.column(6), np.corrcoef(values_a, values_b)[0][1]
     )
@@ -100,28 +98,14 @@ def test_built_in_aggregation(df):
     )
     np.testing.assert_array_almost_equal(result.column(11), np.max(values_a))
     np.testing.assert_array_almost_equal(result.column(12), np.mean(values_b))
-    np.testing.assert_array_almost_equal(
-        result.column(13), np.median(values_b)
-    )
+    np.testing.assert_array_almost_equal(result.column(13), np.median(values_b))
     np.testing.assert_array_almost_equal(result.column(14), np.min(values_a))
     np.testing.assert_array_almost_equal(
         result.column(15), np.sum(values_b.to_pylist())
     )
-    np.testing.assert_array_almost_equal(
-        result.column(16), np.std(values_a, ddof=1)
-    )
-    np.testing.assert_array_almost_equal(
-        result.column(17), np.std(values_b, ddof=0)
-    )
-    np.testing.assert_array_almost_equal(
-        result.column(18), np.std(values_c, ddof=1)
-    )
-    np.testing.assert_array_almost_equal(
-        result.column(19), np.var(values_a, ddof=1)
-    )
-    np.testing.assert_array_almost_equal(
-        result.column(20), np.var(values_b, ddof=0)
-    )
-    np.testing.assert_array_almost_equal(
-        result.column(21), np.var(values_c, ddof=1)
-    )
+    np.testing.assert_array_almost_equal(result.column(16), np.std(values_a, ddof=1))
+    np.testing.assert_array_almost_equal(result.column(17), np.std(values_b, ddof=0))
+    np.testing.assert_array_almost_equal(result.column(18), np.std(values_c, ddof=1))
+    np.testing.assert_array_almost_equal(result.column(19), np.var(values_a, ddof=1))
+    np.testing.assert_array_almost_equal(result.column(20), np.var(values_b, ddof=0))
+    np.testing.assert_array_almost_equal(result.column(21), np.var(values_c, ddof=1))
diff --git a/datafusion/tests/test_config.py b/datafusion/tests/test_config.py
@@ -35,10 +35,7 @@ def test_get_then_set(config):
 
 def test_get_all(config):
     config_dict = config.get_all()
-    assert (
-        config_dict["datafusion.catalog.create_default_catalog_and_schema"]
-        == "true"
-    )
+    assert config_dict["datafusion.catalog.create_default_catalog_and_schema"] == "true"
 
 
 def test_get_invalid_config(config):

diff --git a/datafusion/tests/test_context.py b/datafusion/tests/test_context.py
@@ -36,9 +36,7 @@ def test_create_context_no_args():
 
 
 def test_create_context_with_all_valid_args():
-    runtime = (
-        RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000)
-    )
+    runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000)
     config = (
         SessionConfig()
         .with_create_default_catalog_and_schema(True)