diff --git a/ibis/backends/datafusion/tests/test_udf.py b/ibis/backends/datafusion/tests/test_udf.py index db7430d0de93..a05f5d33a4cb 100644 --- a/ibis/backends/datafusion/tests/test_udf.py +++ b/ibis/backends/datafusion/tests/test_udf.py @@ -2,7 +2,6 @@ import pandas.testing as tm import pytest -from packaging.version import parse as vparse import ibis.expr.datatypes as dt import ibis.expr.types as ir @@ -69,10 +68,6 @@ def median(a: float) -> float: assert result == con.tables.batting.G.execute().median() -@pytest.mark.xfail( - condition=vparse(datafusion.__version__) >= vparse("38.0.1"), - reason="internal error about MEDIAN(G) naming", -) def test_builtin_agg_udf_filtered(con): @udf.agg.builtin def median(a: float, where: bool = True) -> float: diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index cdaa72d5fb6e..6d5dd3c1bce6 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -572,23 +572,7 @@ def test_reduction_ops( reason="risingwave requires an `order_by` for these aggregations", ) @pytest.mark.parametrize("method", ["first", "last"]) -@pytest.mark.parametrize( - "filtered", - [ - param( - False, - marks=[ - pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="datafusion 38.0.1 has a bug in FILTER handling that causes this test to fail", - strict=False, - ) - ], - ), - True, - ], -) +@pytest.mark.parametrize("filtered", [False, True]) @pytest.mark.parametrize( "include_null", [ @@ -662,23 +646,7 @@ def test_first_last(backend, alltypes, method, filtered, include_null): raises=com.OperationNotDefinedError, ) @pytest.mark.parametrize("method", ["first", "last"]) -@pytest.mark.parametrize( - "filtered", - [ - param( - False, - marks=[ - pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="datafusion 38.0.1 has a bug in FILTER handling that causes this test to fail", - strict=False, - ) - ], - ), - True, - ], -) +@pytest.mark.parametrize("filtered", [False, True]) @pytest.mark.parametrize( "include_null", [ @@ -966,11 +934,6 @@ def test_quantile( raises=PsycoPg2InternalError, reason="function covar_pop(integer, integer) does not exist", ), - pytest.mark.xfail_version( - datafusion=["datafusion==38.0.1"], - reason="datafusion FILTER syntax seems broken", - strict=False, # passes with no filter condition - ), ], ), param( @@ -1369,22 +1332,7 @@ def test_group_concat_ordered(alltypes, df, filtered): ["clickhouse", "dask", "pandas", "pyspark", "flink"], raises=com.UnsupportedOperationError, ) -@pytest.mark.parametrize( - "filtered", - [ - param( - True, - marks=[ - pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="datafusion 38.0.1 has a bug in FILTER handling that causes this test to fail", - ) - ], - ), - False, - ], -) +@pytest.mark.parametrize("filtered", [True, False]) def test_collect_ordered(alltypes, df, filtered): ibis_cond = (_.id % 13 == 0) if filtered else None pd_cond = (df.id % 13 == 0) if filtered else True @@ -1407,22 +1355,7 @@ def test_collect_ordered(alltypes, df, filtered): @pytest.mark.notimpl( ["dask"], raises=AttributeError, reason="Dask doesn't implement tolist()" ) -@pytest.mark.parametrize( - "filtered", - [ - param( - True, - marks=[ - pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="datafusion 38.0.1 has a bug in FILTER handling that causes this test to fail", - ) - ], - ), - False, - ], -) +@pytest.mark.parametrize("filtered", [True, False]) @pytest.mark.parametrize( "include_null", [ diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index c2805fb105ca..c0f64fb8911c 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -303,11 +303,6 @@ def test_unnest_complex(backend): @builtin_array -@pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="Input field name ARRAY_AGG(t1.x) does not match with the projection expression", -) def test_unnest_idempotent(backend): array_types = backend.array_types df = array_types.execute() @@ -331,11 +326,6 @@ def test_unnest_idempotent(backend): @builtin_array -@pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="Input field name ARRAY_AGG(t1.x) does not match with the projection expression", -) def test_unnest_no_nulls(backend): array_types = backend.array_types df = array_types.execute() diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index b76ad6657068..6e0118458087 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1020,13 +1020,12 @@ def test_isin_notin(backend, alltypes, df, ibis_op, pandas_op): _.string_col.notin(_.string_col), lambda df: ~df.string_col.isin(df.string_col), id="notin_col", - marks=[pytest.mark.notimpl(["datafusion"])], ), param( (_.bigint_col + 1).notin(_.string_col.length() + 1), lambda df: ~(df.bigint_col.add(1)).isin(df.string_col.str.len().add(1)), id="notin_expr", - marks=[pytest.mark.notimpl(["datafusion", "druid"])], + marks=[pytest.mark.notimpl(["druid"])], ), ], ) @@ -1421,9 +1420,6 @@ def test_pivot_longer(backend): assert len(res.execute()) == len(expected) -@pytest.mark.xfail_version( - datafusion=["datafusion>=38.0.1"], reason="internal error about MEDIAN(G) naming" -) def test_pivot_wider(backend): diamonds = backend.diamonds expr = ( @@ -1484,10 +1480,6 @@ def test_pivot_wider(backend): raises=com.UnsupportedOperationError, reason="first/last requires an order_by", ) -@pytest.mark.notyet( - ["datafusion"], - reason="datafusion 38.0.1 has a bug in FILTER handling that causes this test to fail", -) def test_distinct_on_keep(backend, on, keep): from ibis import _ @@ -1549,10 +1541,6 @@ def test_distinct_on_keep(backend, on, keep): raises=com.UnsupportedOperationError, reason="first/last requires an order_by", ) -@pytest.mark.notyet( - ["datafusion"], - reason="datafusion 38.0.1 has a bug in FILTER handling that causes this test to fail", -) def test_distinct_on_keep_is_none(backend, on): from ibis import _ diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 3c37cb4234c8..8757175f6b60 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -115,7 +115,6 @@ def test_struct_column(alltypes, df): @pytest.mark.notimpl(["postgres", "risingwave", "polars"]) -@pytest.mark.notyet(["datafusion"], raises=Exception, reason="unsupported syntax") def test_collect_into_struct(alltypes): from ibis import _ diff --git a/ibis/backends/tests/tpc/ds/test_queries.py b/ibis/backends/tests/tpc/ds/test_queries.py index 44f5989e86dc..c6a557062430 100644 --- a/ibis/backends/tests/tpc/ds/test_queries.py +++ b/ibis/backends/tests/tpc/ds/test_queries.py @@ -50,7 +50,6 @@ def test_01(store_returns, date_dim, store, customer): @tpc_test("ds") -@pytest.mark.notimpl(["datafusion"], reason="internal error") def test_02(web_sales, catalog_sales, date_dim): wscs = web_sales.select( sold_date_sk=_.ws_sold_date_sk, sales_price=_.ws_ext_sales_price @@ -1353,6 +1352,7 @@ def test_26(catalog_sales, customer_demographics, date_dim, item, promotion): @tpc_test("ds") +@pytest.mark.notyet(["datafusion"], reason="Failed to plan") def test_27(store_sales, customer_demographics, date_dim, store, item): results = ( store_sales.join(customer_demographics, [("ss_cdemo_sk", "cd_demo_sk")]) @@ -2116,7 +2116,6 @@ def test_42(date_dim, store_sales, item): @tpc_test("ds") -@pytest.mark.notyet(["datafusion"], raises=Exception, reason="Internal error") def test_43(date_dim, store_sales, store): return ( date_dim.filter(_.d_year == 2000) @@ -2984,7 +2983,6 @@ def test_58(store_sales, item, date_dim, catalog_sales, web_sales): @tpc_test("ds") -@pytest.mark.notyet(["datafusion"], raises=Exception, reason="Internal error") def test_59(store_sales, date_dim, store): days = [(cal.day_abbr[i].lower(), cal.day_name[i]) for i in range(-1, 6)] diff --git a/ibis/backends/tests/tpc/h/test_queries.py b/ibis/backends/tests/tpc/h/test_queries.py index 2582698fe0e1..3291055cb7cb 100644 --- a/ibis/backends/tests/tpc/h/test_queries.py +++ b/ibis/backends/tests/tpc/h/test_queries.py @@ -498,11 +498,6 @@ def test_15(lineitem, supplier): return q.order_by([q.s_suppkey]) -@pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="IN subqueries not supported in DataFusion", -) @tpc_test("h") def test_16(partsupp, part, supplier): """Parts/Supplier Relationship Query (Q16) @@ -689,11 +684,6 @@ def test_20(supplier, nation, partsupp, part, lineitem): return q1.order_by(q1.s_name) -@pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="EXISTS subqueries not supported in DataFusion", -) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, @@ -745,11 +735,6 @@ def test_21(supplier, lineitem, orders, nation): return q.limit(100) -@pytest.mark.notyet( - ["datafusion"], - raises=Exception, - reason="EXISTS subqueries not supported in DataFusion", -) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, diff --git a/requirements-dev.txt b/requirements-dev.txt index 7828d40e6b20..5b2e755fd4d9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -43,7 +43,7 @@ crashtest==0.4.1 ; python_version >= "3.10" and python_version < "4.0" cryptography==42.0.8 ; python_version >= "3.10" and python_version < "4.0" cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" dask[array,dataframe]==2024.2.1 ; python_version >= "3.10" and python_version < "4.0" -datafusion==39.0.0 ; python_version >= "3.10" and python_version < "4.0" +datafusion==40.1.0 ; python_version >= "3.10" and python_version < "4.0" db-dtypes==1.3.0 ; python_version >= "3.10" and python_version < "4.0" debugpy==1.8.5 ; python_version >= "3.10" and python_version < "3.13" decorator==5.1.1 ; python_version >= "3.10" and python_version < "4.0"