From 374093af34112be245bf09e04cc55042037d5d82 Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Mon, 13 May 2024 16:27:37 -0700 Subject: [PATCH] Import tabulate in each task (#1679) * Import tabulate in each task Signed-off-by: Eduardo Apolinario * Fix lint errors Signed-off-by: Eduardo Apolinario --------- Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- .../data_types_and_io/structured_dataset.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/examples/data_types_and_io/data_types_and_io/structured_dataset.py b/examples/data_types_and_io/data_types_and_io/structured_dataset.py index df0c04a99..12b6ba659 100644 --- a/examples/data_types_and_io/data_types_and_io/structured_dataset.py +++ b/examples/data_types_and_io/data_types_and_io/structured_dataset.py @@ -16,7 +16,6 @@ StructuredDatasetEncoder, StructuredDatasetTransformerEngine, ) -from tabulate import tabulate from typing_extensions import Annotated @@ -203,6 +202,8 @@ class CompanyField: @task(container_image=image) def create_parquet_file() -> StructuredDataset: + from tabulate import tabulate + df = pd.json_normalize(data, max_level=0) print("original dataframe: \n", tabulate(df, headers="keys", tablefmt="psql")) @@ -211,6 +212,8 @@ def create_parquet_file() -> StructuredDataset: @task(container_image=image) def print_table_by_arg(sd: MyArgDataset) -> pd.DataFrame: + from tabulate import tabulate + t = sd.open(pd.DataFrame).all() print("MyArgDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql")) return t @@ -218,6 +221,8 @@ def print_table_by_arg(sd: MyArgDataset) -> pd.DataFrame: @task(container_image=image) def print_table_by_dict(sd: MyDictDataset) -> pd.DataFrame: + from tabulate import tabulate + t = sd.open(pd.DataFrame).all() print("MyDictDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql")) return t @@ -225,6 +230,8 @@ def print_table_by_dict(sd: MyDictDataset) -> pd.DataFrame: @task(container_image=image) def print_table_by_list_dict(sd: MyDictListDataset) -> pd.DataFrame: + from tabulate import tabulate + t = sd.open(pd.DataFrame).all() print("MyDictListDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql")) return t @@ -232,6 +239,8 @@ def print_table_by_list_dict(sd: MyDictListDataset) -> pd.DataFrame: @task(container_image=image) def print_table_by_top_dataclass(sd: MyTopDataClassDataset) -> pd.DataFrame: + from tabulate import tabulate + t = sd.open(pd.DataFrame).all() print("MyTopDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql")) return t @@ -239,6 +248,8 @@ def print_table_by_top_dataclass(sd: MyTopDataClassDataset) -> pd.DataFrame: @task(container_image=image) def print_table_by_top_dict(sd: MyTopDictDataset) -> pd.DataFrame: + from tabulate import tabulate + t = sd.open(pd.DataFrame).all() print("MyTopDictDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql")) return t @@ -246,6 +257,8 @@ def print_table_by_top_dict(sd: MyTopDictDataset) -> pd.DataFrame: @task(container_image=image) def print_table_by_second_dataclass(sd: MySecondDataClassDataset) -> pd.DataFrame: + from tabulate import tabulate + t = sd.open(pd.DataFrame).all() print("MySecondDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql")) return t @@ -253,6 +266,8 @@ def print_table_by_second_dataclass(sd: MySecondDataClassDataset) -> pd.DataFram @task(container_image=image) def print_table_by_nested_dataclass(sd: MyNestedDataClassDataset) -> pd.DataFrame: + from tabulate import tabulate + t = sd.open(pd.DataFrame).all() print("MyNestedDataClassDataset dataframe: \n", tabulate(t, headers="keys", tablefmt="psql")) return t