Skip to content

Commit

Permalink
SNOW-1371757: Fix repr when max_cols or max_rows is None (#1545)
Browse files Browse the repository at this point in the history
1. Which Jira issue is this PR addressing? Make sure that there is an
accompanying issue to your PR.

   Fixes SNOW-1371757

2. Fill out the following pre-review checklist:

- [x] I am adding a new automated test(s) to verify correctness of my
new code
   - [ ] I am adding new logging messages
   - [ ] I am adding a new telemetry message
   - [ ] I am adding new credentials
   - [ ] I am adding a new dependency

3. Please describe how your code solves the related issue.

Catch `None` values at the API layer and sanitize in the same way Modin
OSS does:
   

https://github.com/modin-project/modin/blob/06699a892c2a020c92ecf0c54d1aa35bbcd5660b/modin/pandas/dataframe.py#L268-L269

---------

Signed-off-by: Devin Petersohn <devin.petersohn@snowflake.com>
  • Loading branch information
sfc-gh-dpetersohn authored May 13, 2024
1 parent 935f011 commit e158e41
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 9 deletions.
4 changes: 2 additions & 2 deletions src/snowflake/snowpark/modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,9 @@ def __repr__(self):
str
"""
# TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
num_rows = pandas.get_option("display.max_rows") or 10
num_rows = pandas.get_option("display.max_rows") or len(self)
# see _repr_html_ for comment, allow here also all column behavior
num_cols = pandas.get_option("display.max_columns")
num_cols = pandas.get_option("display.max_columns") or len(self.columns)

(
row_count,
Expand Down
1 change: 1 addition & 0 deletions src/snowflake/snowpark/modin/plugin/PANDAS_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Bug Fixes
- Fixed bug when creating a DataFrame with a dict of Series objects.
- Fixed support for `display.max_rows` and `display.max_cols` being set to `None`.
- Fixed bug when performing multiple DataFrameGroupBy apply/transform operations on the same DataFrame.

### Improvements
Expand Down
36 changes: 29 additions & 7 deletions tests/integ/modin/frame/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ def setup_class(self):
self.num_rows = pd.get_option("display.max_rows")
self.num_cols = pd.get_option("display.max_columns")

self.native_df = native_pd.DataFrame(
data=np.zeros(shape=(40, 40)), columns=[f"x{i}" for i in range(40)]
)
self.snow_df = pd.DataFrame(self.native_df)

def teardown_class(self):
native_pd.set_option("display.max_rows", self.native_num_rows)
native_pd.set_option("display.max_columns", self.native_num_cols)
Expand All @@ -162,19 +167,36 @@ def test_with_max_columns(self):
native_pd.set_option("display.max_columns", 10)
pd.set_option("display.max_columns", 10)

native_df = native_pd.DataFrame(
data=np.zeros(shape=(40, 40)), columns=[f"x{i}" for i in range(40)]
)
snow_df = pd.DataFrame(native_df)

# This test should only issue 6 SQL queries given dataframe creation from large
# local data: 1) Creating temp table, 2) Setting query tag, 3) Inserting into temp table,
# 4) Unsetting query tag, 5) Select Columns, 6) Drop temp table.
# However, an additional 6 queries are issued to eagerly get the row count.
# for now, track only SELECT count here
with SqlCounter(select_count=1):
snow_str = repr(snow_df)
native_str = repr(native_df)
snow_str = repr(self.snow_df)
native_str = repr(self.native_df)

assert snow_str == native_str
self.teardown_class()

def test_with_max_rows_none(self):
native_pd.set_option("display.max_rows", None)
pd.set_option("display.max_rows", None)

with SqlCounter(select_count=2):
snow_str = repr(self.snow_df)
native_str = repr(self.native_df)

assert snow_str == native_str
self.teardown_class()

def test_with_max_columns_none(self):
native_pd.set_option("display.max_columns", None)
pd.set_option("display.max_columns", None)

with SqlCounter(select_count=1):
snow_str = repr(self.snow_df)
native_str = repr(self.native_df)

assert snow_str == native_str

Expand Down

0 comments on commit e158e41

Please sign in to comment.