Skip to content

Commit

Permalink
SNOW-896744: Allow creating dataframes using tuples as schema (#1025)
Browse files Browse the repository at this point in the history
* SNOW-896744: Allow creating dataframes using tuples as schema

* use iterable instead of just tuple

* changelog updates
  • Loading branch information
sfc-gh-aalam authored Aug 29, 2023
1 parent fd1d347 commit 98e446a
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Release History

## 1.8.0 (TBD)

### New Features

- Accept `Iterable` objects input for `schema` when creating dataframes using `Session.create_dataframe`.

## 1.7.0 (2023-08-28)

### New Features
Expand Down
6 changes: 3 additions & 3 deletions src/snowflake/snowpark/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1865,7 +1865,7 @@ def write_pandas(
def create_dataframe(
self,
data: Union[List, Tuple, "pandas.DataFrame"],
schema: Optional[Union[StructType, List[str]]] = None,
schema: Optional[Union[StructType, Iterable[str]]] = None,
) -> DataFrame:
"""Creates a new DataFrame containing the specified values from the local data.
Expand Down Expand Up @@ -1961,8 +1961,8 @@ def create_dataframe(
else:
if not data:
raise ValueError("Cannot infer schema from empty data")
if isinstance(schema, list):
names = schema
if isinstance(schema, Iterable):
names = list(schema)
new_schema = reduce(
merge_type,
(infer_schema(row, names) for row in data),
Expand Down
7 changes: 7 additions & 0 deletions tests/integ/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2855,6 +2855,13 @@ def test_create_dataframe_special_char_column_name(session):
Utils.check_answer(df2, [Row(1, 2, 3), Row(1, 2, 3)])


def test_create_dataframe_with_tuple_schema(session):
df = session.create_dataframe(
[(20000101, 1, "x"), (20000101, 2, "y")], schema=("TIME", "ID", "V2")
)
Utils.check_answer(df, [Row(20000101, 1, "x"), Row(20000101, 2, "y")])


def test_df_join_suffix(session):
df1 = session.create_dataframe([[1, 1, "1"], [2, 2, "3"]]).to_df(["a", "b", "c"])
df2 = session.create_dataframe([[1, 1, "1"], [2, 3, "5"]]).to_df(["a", "b", "c"])
Expand Down

0 comments on commit 98e446a

Please sign in to comment.