Skip to content

Commit

Permalink
feat: dask in api-completeness table (#741)
Browse files Browse the repository at this point in the history
* feat: dask in api-completeness table

* generalize a bit

* type return

* fix strict build
  • Loading branch information
FBruzzesi authored Aug 22, 2024
1 parent 2f7dfc6 commit 949d5b6
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 76 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ todo.md
site/
.coverage.*
.nox
docs/api-completeness.md

docs/api-completeness/*.md
!docs/api-completeness/index.md
14 changes: 14 additions & 0 deletions docs/api-completeness/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# API Completeness

Narwhals has two different level of support for libraries: "full" and "interchange".

Libraries for which we have full support we intend to support the whole Narwhals API,
however this is a continuous work in progress.

In the following section it is possible to check which method is implemented for which
class and backend.

!!! info

- By design, Polars supports all the methods of the Narwhals API.
- "pandas-like" means pandas, cuDF and Modin.
2 changes: 1 addition & 1 deletion docs/extending.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Alternatively, if you can't do that (for example, if you library is closed-sourc
the next section for what else you can do.

To check which methods are supported for which backend in depth, please refer to the
[API completeness page](api-completeness.md).
[API completeness page](api-completeness/index.md).

## Extending Narwhals

Expand Down
6 changes: 5 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ nav:
- how_it_works.md
- Roadmap: roadmap.md
- Related projects: related.md
- API Completeness: api-completeness.md
- API Completeness:
- api-completeness/index.md
- api-completeness/dataframe.md
- api-completeness/expr.md
- api-completeness/series.md
- API Reference:
- api-reference/narwhals.md
- api-reference/dataframe.md
Expand Down
13 changes: 1 addition & 12 deletions utils/api-completeness.md.jinja
Original file line number Diff line number Diff line change
@@ -1,14 +1,3 @@
# API Completeness

Narwhals has two different level of support for libraries: "full" and "interchange".

Libraries for which we have full support we intend to support the whole Narwhals API, however this is a work in progress.

In the following table it is possible to check which method is implemented for which backend.

!!! info

- "pandas-like" means pandas, cuDF and Modin
- Polars supports all the methods (by design)
# {{ title }}

{{ backend_table }}
143 changes: 82 additions & 61 deletions utils/generate_backend_completeness.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,67 @@

import importlib
import inspect
from enum import Enum
from enum import auto
from pathlib import Path
from typing import Any
from typing import Final
from typing import NamedTuple

import polars as pl
from jinja2 import Template

TEMPLATE_PATH: Final[Path] = Path("utils") / "api-completeness.md.jinja"
DESTINATION_PATH: Final[Path] = Path("docs") / "api-completeness.md"
DESTINATION_PATH: Final[Path] = Path("docs") / "api-completeness"


class BackendType(Enum):
LAZY = auto()
EAGER = auto()
BOTH = auto()


class Backend(NamedTuple):
name: str
module: str
type_: BackendType


MODULES = ["dataframe", "series", "expr"]

BACKENDS = [
Backend(name="pandas-like", module="_pandas_like", type_=BackendType.EAGER),
Backend(name="arrow", module="_arrow", type_=BackendType.EAGER),
Backend(name="dask", module="_dask", type_=BackendType.LAZY),
]

EXCLUDE_CLASSES = {"BaseFrame"}


def get_class_methods(kls: type[Any]) -> list[str]:
return [m[0] for m in inspect.getmembers(kls) if not m[0].startswith("_")]


def get_backend_completeness_table() -> str:
results = []
def parse_module(module_name: str, backend: str, nw_class_name: str) -> list[str]:
try:
module_ = importlib.import_module(f"narwhals.{backend}.{module_name}")
class_ = inspect.getmembers(
module_,
predicate=lambda c: inspect.isclass(c) and c.__name__.endswith(nw_class_name),
)
methods_ = get_class_methods(class_[0][1]) if class_ else []

except ModuleNotFoundError:
methods_ = []

return methods_


def get_backend_completeness_table() -> None:
for module_name in MODULES:
results = []

nw_namespace = f"narwhals.{module_name}"
sub_module_name = module_name

narwhals_module_ = importlib.import_module(nw_namespace)
classes_ = inspect.getmembers(
Expand All @@ -37,71 +73,56 @@ def get_backend_completeness_table() -> str:
for nw_class_name, nw_class in classes_:
if nw_class_name in EXCLUDE_CLASSES:
continue
if nw_class_name == "LazyFrame":
backend_class_name = "DataFrame"
else:
backend_class_name = nw_class_name

arrow_class_name = f"Arrow{backend_class_name}"
arrow_module_ = importlib.import_module(f"narwhals._arrow.{sub_module_name}")
arrow_class = inspect.getmembers(
arrow_module_,
predicate=lambda c: inspect.isclass(c) and c.__name__ == arrow_class_name, # noqa: B023
)

pandas_class_name = f"PandasLike{backend_class_name}"
pandas_module_ = importlib.import_module(
f"narwhals._pandas_like.{sub_module_name}"
)
pandas_class = inspect.getmembers(
pandas_module_,
predicate=lambda c: inspect.isclass(c)
and c.__name__ == pandas_class_name, # noqa: B023
)

nw_methods = get_class_methods(nw_class)
arrow_methods = get_class_methods(arrow_class[0][1]) if arrow_class else []
pandas_methods = get_class_methods(pandas_class[0][1]) if pandas_class else []

narhwals = pl.DataFrame(
{"Class": nw_class_name, "Backend": "narwhals", "Method": nw_methods}
)
arrow = pl.DataFrame(
{"Class": nw_class_name, "Backend": "arrow", "Method": arrow_methods}
)
pandas = pl.DataFrame(
{
"Class": nw_class_name,
"Backend": "pandas-like",
"Method": pandas_methods,
}
)

results.extend([narhwals, pandas, arrow])

results = (
pl.concat(results) # noqa: PD010
.with_columns(supported=pl.lit(":white_check_mark:"))
.pivot(on="Backend", values="supported", index=["Class", "Method"])
.filter(pl.col("narwhals").is_not_null())
.drop("narwhals")
.fill_null(":x:")
.sort("Class", "Method")
)

with pl.Config(
tbl_formatting="ASCII_MARKDOWN",
tbl_hide_column_data_types=True,
tbl_hide_dataframe_shape=True,
set_tbl_rows=results.shape[0],
):
return str(results)
backend_methods = [
pl.DataFrame(
{
"Class": nw_class_name,
"Backend": backend.name,
"Method": parse_module(
module_name,
backend=backend.module,
nw_class_name=nw_class_name,
),
# "Type": backend.type_
}
)
for backend in BACKENDS
]

results.extend([narhwals, *backend_methods])

results = (
pl.concat(results) # noqa: PD010
.with_columns(supported=pl.lit(":white_check_mark:"))
.pivot(on="Backend", values="supported", index=["Class", "Method"])
.filter(pl.col("narwhals").is_not_null())
.drop("narwhals")
.fill_null(":x:")
.sort("Class", "Method")
)

with pl.Config(
tbl_formatting="ASCII_MARKDOWN",
tbl_hide_column_data_types=True,
tbl_hide_dataframe_shape=True,
set_tbl_rows=results.shape[0],
):
table = str(results)

with TEMPLATE_PATH.open(mode="r") as stream:
new_content = Template(stream.read()).render(
{"backend_table": table, "title": module_name.capitalize()}
)

backend_table = get_backend_completeness_table()
with (DESTINATION_PATH / f"{module_name}.md").open(mode="w") as destination:
destination.write(new_content)

with TEMPLATE_PATH.open(mode="r") as stream:
new_content = Template(stream.read()).render({"backend_table": backend_table})

with DESTINATION_PATH.open(mode="w") as destination:
destination.write(new_content)
_ = get_backend_completeness_table()

0 comments on commit 949d5b6

Please sign in to comment.