Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pre-commit.ci] pre-commit autoupdate #818

Merged
merged 3 commits into from
Sep 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.5.7'
rev: 'v0.6.3'
hooks:
# Run the formatter.
- id: ruff-format
# Run the linter.
- id: ruff
args: [--fix]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.11.1'
rev: 'v1.11.2'
hooks:
- id: mypy
additional_dependencies: ['polars==1.4.1', 'pytest==8.3.2']
Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,15 @@ lint.ignore = [
"tests/*" = ["S101"]
"utils/*" = ["S311", "PTH123"]
"tpch/execute/*" = ["T201"]
"tpch/notebooks/*" = [
"ANN001",
"ANN201",
"EM101",
"EXE002",
"PTH123",
"T203",
"TRY003",
]

[tool.ruff.lint.pydocstyle]
convention = "google"
Expand Down
4 changes: 2 additions & 2 deletions tests/expr_and_series/arithmetic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def test_truediv_same_dims(constructor_eager: Any, request: Any) -> None:
compare_dicts({"a": result}, {"a": [2, 1, 1 / 3]})


@pytest.mark.slow()
@pytest.mark.slow
@given( # type: ignore[misc]
left=st.integers(-100, 100),
right=st.integers(-100, 100),
Expand Down Expand Up @@ -189,7 +189,7 @@ def test_floordiv(left: int, right: int) -> None:
compare_dicts(result, expected)


@pytest.mark.slow()
@pytest.mark.slow
@given( # type: ignore[misc]
left=st.integers(-100, 100),
right=st.integers(-100, 100),
Expand Down
2 changes: 1 addition & 1 deletion tests/expr_and_series/dt/ordinal_day_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
parse_version(pd.__version__) < parse_version("2.0.0"),
reason="pyarrow dtype not available",
)
@pytest.mark.slow()
@pytest.mark.slow
def test_ordinal_day(dates: datetime) -> None:
result_pd = nw.from_native(pd.Series([dates]), series_only=True).dt.ordinal_day()[0]
result_pdms = nw.from_native(
Expand Down
2 changes: 1 addition & 1 deletion tests/expr_and_series/dt/total_minutes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
parse_version(pd.__version__) < parse_version("2.2.0"),
reason="pyarrow dtype not available",
)
@pytest.mark.slow()
@pytest.mark.slow
def test_total_minutes(timedeltas: timedelta) -> None:
result_pd = nw.from_native(
pd.Series([timedeltas]), series_only=True
Expand Down
2 changes: 1 addition & 1 deletion tests/hypothesis/test_basic_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
max_size=3,
),
) # type: ignore[misc]
@pytest.mark.slow()
@pytest.mark.slow
def test_mean(
integer: st.SearchStrategy[list[int]],
floats: st.SearchStrategy[float],
Expand Down
2 changes: 1 addition & 1 deletion tests/hypothesis/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
),
how=st.sampled_from(["horizontal", "vertical"]),
) # type: ignore[misc]
@pytest.mark.slow()
@pytest.mark.slow
@pytest.mark.skipif(is_windows(), reason="pyarrow breaking on windows")
def test_concat( # pragma: no cover
integers: list[int],
Expand Down
6 changes: 3 additions & 3 deletions tests/hypothesis/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
) # type: ignore[misc]
@pytest.mark.skipif(pl_version < parse_version("0.20.13"), reason="0.0 == -0.0")
@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
@pytest.mark.slow()
@pytest.mark.slow
def test_join( # pragma: no cover
integers: st.SearchStrategy[list[int]],
other_integers: st.SearchStrategy[list[int]],
Expand Down Expand Up @@ -88,7 +88,7 @@ def test_join( # pragma: no cover
max_size=3,
),
) # type: ignore[misc]
@pytest.mark.slow()
@pytest.mark.slow
@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
def test_cross_join( # pragma: no cover
integers: st.SearchStrategy[list[int]],
Expand Down Expand Up @@ -135,7 +135,7 @@ def test_cross_join( # pragma: no cover
st.sampled_from(["a", "b", "d"]), min_size=1, max_size=3, unique=True
),
)
@pytest.mark.slow()
@pytest.mark.slow
@pytest.mark.filterwarnings("ignore:the default coalesce behavior")
def test_left_join( # pragma: no cover
a_left_data: list[int],
Expand Down
53 changes: 29 additions & 24 deletions tpch/notebooks/q1/execute.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,12 @@
},
"outputs": [],
"source": [
"from typing import Any\n",
"from datetime import datetime\n",
"from typing import Any\n",
"\n",
"import narwhals as nw\n",
"\n",
"\n",
"@nw.narwhalify\n",
"def q1(lineitem_ds: Any) -> Any:\n",
" var_1 = datetime(1998, 9, 2)\n",
Expand Down Expand Up @@ -107,14 +109,14 @@
"outputs": [],
"source": [
"dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
"region = dir_ + 'region.parquet'\n",
"nation = dir_ + 'nation.parquet'\n",
"customer = dir_ + 'customer.parquet'\n",
"lineitem = dir_ + 'lineitem.parquet'\n",
"orders = dir_ + 'orders.parquet'\n",
"supplier = dir_ + 'supplier.parquet'\n",
"part = dir_ + 'part.parquet'\n",
"partsupp = dir_ + 'partsupp.parquet'"
"region = dir_ + \"region.parquet\"\n",
"nation = dir_ + \"nation.parquet\"\n",
"customer = dir_ + \"customer.parquet\"\n",
"lineitem = dir_ + \"lineitem.parquet\"\n",
"orders = dir_ + \"orders.parquet\"\n",
"supplier = dir_ + \"supplier.parquet\"\n",
"part = dir_ + \"part.parquet\"\n",
"partsupp = dir_ + \"partsupp.parquet\""
]
},
{
Expand All @@ -133,16 +135,18 @@
},
"outputs": [],
"source": [
"import pyarrow.parquet as pq\n",
"import dask.dataframe as dd\n",
"import pyarrow.parquet as pq\n",
"\n",
"IO_FUNCS = {\n",
" 'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
" 'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
" 'polars[eager]': lambda x: pl.read_parquet(x),\n",
" 'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
" 'pyarrow': lambda x: pq.read_table(x),\n",
" 'dask': lambda x: dd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
" \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
" \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
" x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
" ),\n",
" \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
" \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
" \"pyarrow\": lambda x: pq.read_table(x),\n",
" \"dask\": lambda x: dd.read_parquet(x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"),\n",
"}"
]
},
Expand Down Expand Up @@ -171,7 +175,7 @@
"metadata": {},
"outputs": [],
"source": [
"tool = 'pyarrow'\n",
"tool = \"pyarrow\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -210,7 +214,7 @@
},
"outputs": [],
"source": [
"tool = 'pandas'\n",
"tool = \"pandas\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(lineitem_ds=fn(lineitem))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -249,7 +253,7 @@
},
"outputs": [],
"source": [
"tool = 'pandas[pyarrow]'\n",
"tool = \"pandas[pyarrow]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -288,7 +292,7 @@
},
"outputs": [],
"source": [
"tool = 'polars[eager]'\n",
"tool = \"polars[eager]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -327,7 +331,7 @@
},
"outputs": [],
"source": [
"tool = 'polars[lazy]'\n",
"tool = \"polars[lazy]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem)).collect()\n",
"results[tool] = timings.all_runs"
Expand All @@ -348,7 +352,7 @@
"metadata": {},
"outputs": [],
"source": [
"tool = 'dask'\n",
"tool = \"dask\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q1(fn(lineitem)).collect()\n",
"results[tool] = timings.all_runs"
Expand All @@ -370,8 +374,9 @@
"outputs": [],
"source": [
"import json\n",
"with open('results.json', 'w') as fd:\n",
" json.dump(results, fd)\n"
"\n",
"with open(\"results.json\", \"w\") as fd:\n",
" json.dump(results, fd)"
]
}
],
Expand Down
41 changes: 22 additions & 19 deletions tpch/notebooks/q10/execute.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,23 @@
},
"outputs": [],
"source": [
"from typing import Any\n",
"from datetime import datetime\n",
"from typing import Any\n",
"\n",
"import narwhals as nw\n",
"\n",
"\n",
"def q10(\n",
" customer_ds_raw: Any,\n",
" nation_ds_raw: Any,\n",
" lineitem_ds_raw: Any,\n",
" orders_ds_raw: Any,\n",
") -> Any:\n",
"\n",
" nation_ds = nw.from_native(nation_ds_raw)\n",
" line_item_ds = nw.from_native(lineitem_ds_raw)\n",
" orders_ds = nw.from_native(orders_ds_raw)\n",
" customer_ds = nw.from_native(customer_ds_raw)\n",
" \n",
"\n",
" var1 = datetime(1993, 10, 1)\n",
" var2 = datetime(1994, 1, 1)\n",
"\n",
Expand All @@ -81,8 +82,7 @@
" .filter(nw.col(\"o_orderdate\").is_between(var1, var2, closed=\"left\"))\n",
" .filter(nw.col(\"l_returnflag\") == \"R\")\n",
" .with_columns(\n",
" (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\")))\n",
" .alias(\"revenue\")\n",
" (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\"))).alias(\"revenue\")\n",
" )\n",
" .group_by(\n",
" \"c_custkey\",\n",
Expand Down Expand Up @@ -127,10 +127,10 @@
"outputs": [],
"source": [
"dir_ = \"/kaggle/input/tpc-h-data-parquet-s-2/\"\n",
"nation = dir_ + 'nation.parquet'\n",
"lineitem = dir_ + 'lineitem.parquet'\n",
"orders = dir_ + 'orders.parquet'\n",
"customer = dir_ + 'customer.parquet'"
"nation = dir_ + \"nation.parquet\"\n",
"lineitem = dir_ + \"lineitem.parquet\"\n",
"orders = dir_ + \"orders.parquet\"\n",
"customer = dir_ + \"customer.parquet\""
]
},
{
Expand All @@ -149,10 +149,12 @@
"outputs": [],
"source": [
"IO_FUNCS = {\n",
" 'pandas': lambda x: pd.read_parquet(x, engine='pyarrow'),\n",
" 'pandas[pyarrow]': lambda x: pd.read_parquet(x, engine='pyarrow', dtype_backend='pyarrow'),\n",
" 'polars[eager]': lambda x: pl.read_parquet(x),\n",
" 'polars[lazy]': lambda x: pl.scan_parquet(x),\n",
" \"pandas\": lambda x: pd.read_parquet(x, engine=\"pyarrow\"),\n",
" \"pandas[pyarrow]\": lambda x: pd.read_parquet(\n",
" x, engine=\"pyarrow\", dtype_backend=\"pyarrow\"\n",
" ),\n",
" \"polars[eager]\": lambda x: pl.read_parquet(x),\n",
" \"polars[lazy]\": lambda x: pl.scan_parquet(x),\n",
"}"
]
},
Expand Down Expand Up @@ -196,7 +198,7 @@
},
"outputs": [],
"source": [
"tool = 'pandas'\n",
"tool = \"pandas\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -233,7 +235,7 @@
},
"outputs": [],
"source": [
"tool = 'pandas[pyarrow]'\n",
"tool = \"pandas[pyarrow]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -270,7 +272,7 @@
},
"outputs": [],
"source": [
"tool = 'polars[eager]'\n",
"tool = \"polars[eager]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders))\n",
"results[tool] = timings.all_runs"
Expand Down Expand Up @@ -307,7 +309,7 @@
},
"outputs": [],
"source": [
"tool = 'polars[lazy]'\n",
"tool = \"polars[lazy]\"\n",
"fn = IO_FUNCS[tool]\n",
"timings = %timeit -o -q q10(fn(customer), fn(nation), fn(lineitem), fn(orders)).collect()\n",
"results[tool] = timings.all_runs"
Expand All @@ -327,8 +329,9 @@
"outputs": [],
"source": [
"import json\n",
"with open('results.json', 'w') as fd:\n",
" json.dump(results, fd)\n"
"\n",
"with open(\"results.json\", \"w\") as fd:\n",
" json.dump(results, fd)"
]
}
],
Expand Down
Loading
Loading