-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
ENH: adding autofilter when writing to excel (pandas-dev#61194) #62994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
90f75a5
6bc6952
d1b05e8
1365c21
f69c62f
ecfd509
5160d24
0ba5cad
d87e245
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -532,6 +532,8 @@ class ExcelFormatter: | |
| Defaults to ``CSSToExcelConverter()``. | ||
| It should have signature css_declarations string -> excel style. | ||
| This is only called for body cells. | ||
| autofilter : bool, default False | ||
| If True, add automatic filters to all columns | ||
| """ | ||
|
|
||
| max_rows = 2**20 | ||
|
|
@@ -549,6 +551,7 @@ def __init__( | |
| merge_cells: ExcelWriterMergeCells = False, | ||
| inf_rep: str = "inf", | ||
| style_converter: Callable | None = None, | ||
| autofilter: bool = False, | ||
| ) -> None: | ||
| self.rowcounter = 0 | ||
| self.na_rep = na_rep | ||
|
|
@@ -584,6 +587,7 @@ def __init__( | |
| raise ValueError(f"Unexpected value for {merge_cells=}.") | ||
| self.merge_cells = merge_cells | ||
| self.inf_rep = inf_rep | ||
| self.autofilter = autofilter | ||
|
|
||
| def _format_value(self, val): | ||
| if is_scalar(val) and missing.isna(val): | ||
|
|
@@ -873,6 +877,34 @@ def get_formatted_cells(self) -> Iterable[ExcelCell]: | |
| cell.val = self._format_value(cell.val) | ||
| yield cell | ||
|
|
||
| def _num2excel(self, index: int) -> str: | ||
| """ | ||
| Convert 0-based column index to Excel column name. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| index : int | ||
| The numeric column index to convert to a Excel column name. | ||
|
|
||
| Returns | ||
| ------- | ||
| column_name : str | ||
| The column name corresponding to the index. | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError | ||
| Index is negative | ||
| """ | ||
| if index < 0: | ||
| raise ValueError(f"Index cannot be negative: {index}") | ||
| column_name = "" | ||
| # while loop in case column name needs to be longer than 1 character | ||
| while index > 0 or not column_name: | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| index, remainder = divmod(index, 26) | ||
| column_name = chr(65 + remainder) + column_name | ||
| return column_name | ||
|
|
||
| @doc(storage_options=_shared_docs["storage_options"]) | ||
| def write( | ||
| self, | ||
|
|
@@ -916,6 +948,31 @@ def write( | |
| f"Max sheet size is: {self.max_rows}, {self.max_cols}" | ||
| ) | ||
|
|
||
| if self.autofilter: | ||
| if num_cols == 0: | ||
| indexoffset = 0 | ||
| elif self.index: | ||
| if isinstance(self.df.index, MultiIndex): | ||
| indexoffset = self.df.index.nlevels - 1 | ||
| if self.merge_cells: | ||
| warnings.warn( | ||
| "Excel filters merged cells by showing only the first row." | ||
| "'autofiler' and 'merge_cells' should not " | ||
| "be used simultaneously.", | ||
|
Comment on lines
+959
to
+961
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because of blanks, the autofilter here will produce what I consider invalid results. As such, I think we should raise rather than warn. |
||
| UserWarning, | ||
| stacklevel=find_stack_level(), | ||
| ) | ||
| else: | ||
| indexoffset = 0 | ||
| else: | ||
| indexoffset = -1 | ||
| start = f"{self._num2excel(startcol)}{startrow + 1}" | ||
| autofilter_end_column = self._num2excel(startcol + num_cols + indexoffset) | ||
| end = f"{autofilter_end_column}{startrow + num_rows + 1}" | ||
| autofilter_range = f"{start}:{end}" | ||
| else: | ||
| autofilter_range = None | ||
|
|
||
| if engine_kwargs is None: | ||
| engine_kwargs = {} | ||
|
|
||
|
|
@@ -938,6 +995,7 @@ def write( | |
| startrow=startrow, | ||
| startcol=startcol, | ||
| freeze_panes=freeze_panes, | ||
| autofilter_range=autofilter_range, | ||
| ) | ||
| finally: | ||
| # make sure to close opened file handles | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -350,3 +350,89 @@ def test_format_hierarchical_rows_periodindex(merge_cells): | |
| assert isinstance(cell.val, Timestamp), ( | ||
| "Period should be converted to Timestamp" | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) | ||
| @pytest.mark.parametrize("with_index", [True, False]) | ||
| def test_autofilter(engine, with_index, tmp_excel): | ||
| # GH 61194 | ||
| df = DataFrame.from_dict([{"A": 1, "B": 2, "C": 3}, {"A": 4, "B": 5, "C": 6}]) | ||
|
|
||
| with ExcelWriter(tmp_excel, engine=engine) as writer: | ||
| df.to_excel(writer, autofilter=True, index=with_index) | ||
|
|
||
| openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl | ||
| with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: | ||
| ws = wb.active | ||
|
|
||
| assert ws.auto_filter.ref is not None | ||
| assert ws.auto_filter.ref == "A1:D3" if with_index else "A1:C3" | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) | ||
| def test_autofilter_with_startrow_startcol(engine, tmp_excel): | ||
| # GH 61194 | ||
| df = DataFrame.from_dict([{"A": 1, "B": 2, "C": 3}, {"A": 4, "B": 5, "C": 6}]) | ||
| with ExcelWriter(tmp_excel, engine=engine) as writer: | ||
| df.to_excel(writer, autofilter=True, startrow=10, startcol=10) | ||
|
|
||
| openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl | ||
| with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: | ||
| ws = wb.active | ||
| assert ws.auto_filter.ref is not None | ||
| # Autofiler range moved by 10x10 cells | ||
| assert ws.auto_filter.ref == "K11:N13" | ||
|
|
||
|
|
||
| def test_autofilter_not_supported_by_odf(tmp_path): | ||
| # GH 61194 | ||
| # odf needs 'ods' extension | ||
| tmp_excel_ods = tmp_path / f"{uuid.uuid4()}.ods" | ||
| tmp_excel_ods.touch() | ||
|
|
||
| with pytest.raises(ValueError, match="Autofilter is not supported with odf!"): | ||
| with ExcelWriter(str(tmp_excel_ods), engine="odf") as writer: | ||
| DataFrame().to_excel(writer, autofilter=True, index=False) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For each test, can you test all engines with something like: if engine in [...]:
with pytest.raises(...):
...
returnThis makes the test suite more robust when adding a new engine. You might find it easier to move these over to |
||
| def test_autofilter_with_multiindex(engine, tmp_excel): | ||
| # GH 61194 | ||
| df = DataFrame( | ||
| { | ||
| "animal": ("horse", "horse", "dog", "dog"), | ||
| "color of fur": ("black", "white", "grey", "black"), | ||
| "name": ("Blacky", "Wendy", "Rufus", "Catchy"), | ||
| } | ||
| ) | ||
| # setup hierarchical index | ||
| mi_df = df.set_index(["animal", "color of fur"]) | ||
| with ExcelWriter(tmp_excel, engine=engine) as writer: | ||
| mi_df.to_excel(writer, autofilter=True, index=True, merge_cells=False) | ||
|
|
||
| openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl | ||
| with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: | ||
| ws = wb.active | ||
|
|
||
| assert ws.auto_filter.ref is not None | ||
| assert ws.auto_filter.ref == "A1:C5" | ||
|
|
||
|
|
||
| def test_autofilter_with_multiindex_and_merge_cells_shows_warning(tmp_excel): | ||
| # GH 61194 | ||
| df = DataFrame( | ||
| { | ||
| "animal": ("horse", "horse", "dog", "dog"), | ||
| "color of fur": ("black", "white", "grey", "black"), | ||
| "name": ("Blacky", "Wendy", "Rufus", "Catchy"), | ||
| } | ||
| ) | ||
| # setup hierarchical index | ||
| mi_df = df.set_index(["animal", "color of fur"]) | ||
| with ExcelWriter(tmp_excel, engine="openpyxl") as writer: | ||
| with tm.assert_produces_warning( | ||
| UserWarning, | ||
| match="Excel filters merged cells by showing only the first row." | ||
| "'autofiler' and 'merge_cells' should not be used simultaneously.", | ||
| ): | ||
| mi_df.to_excel(writer, autofilter=True, index=True, merge_cells=True) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems to me we should be raising an error here if a user specifies
autofilter=Trueand it is not supported with an engine rather than silently ignore the user's specification.