Skip to content

Commit

Permalink
Merge branch 'main' into ibisml-kaggle-competition-blog
Browse files Browse the repository at this point in the history
  • Loading branch information
jitingxu1 authored Aug 22, 2024
2 parents 96219ea + 910fa5c commit 16bfe86
Show file tree
Hide file tree
Showing 21 changed files with 151 additions and 144 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docs-preview.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
- name: Get all changed qmd files
id: changed-qmd-files
uses: tj-actions/changed-files@v44
uses: tj-actions/changed-files@v45
with:
# Avoid using single or double quotes for multiline patterns
files: |
Expand Down
2 changes: 0 additions & 2 deletions docs/_tabsets/install.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ from textwrap import dedent
backends = [
{"name": "BigQuery", "module": "bigquery"},
{"name": "ClickHouse", "module": "clickhouse"},
{"name": "Dask", "module": "dask"},
{"name": "DataFusion", "module": "datafusion"},
{"name": "Druid", "module": "druid"},
{"name": "DuckDB", "module": "duckdb"},
Expand All @@ -20,7 +19,6 @@ backends = [
{"name": "MSSQL", "module": "mssql"},
{"name": "MySQL", "module": "mysql"},
{"name": "Oracle", "module": "oracle"},
{"name": "pandas", "module": "pandas"},
{"name": "Polars", "module": "polars"},
{"name": "PostgreSQL", "module": "postgres"},
{"name": "PySpark", "module": "pyspark"},
Expand Down
7 changes: 7 additions & 0 deletions docs/backends/dask.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@

![](https://img.shields.io/badge/memtables-native-green?style=flat-square) ![](https://img.shields.io/badge/inputs-CSV | Parquet-blue?style=flat-square) ![](https://img.shields.io/badge/outputs-CSV | pandas | Parquet | PyArrow-orange?style=flat-square)

::: {.callout-warning}
## The Dask backend is slated for removal in Ibis 10.0
We recommend using one of our other backends.

Many workloads work well on the DuckDB and Polars backends, for example.
:::

## Install

Install Ibis and dependencies for the Dask backend:
Expand Down
8 changes: 8 additions & 0 deletions docs/backends/pandas.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@

![](https://img.shields.io/badge/memtables-native-green?style=flat-square) ![](https://img.shields.io/badge/inputs-CSV | Parquet-blue?style=flat-square) ![](https://img.shields.io/badge/outputs-CSV | pandas | Parquet | PyArrow-orange?style=flat-square)

::: {.callout-warning}
## The Pandas backend is slated for removal in Ibis 10.0
We recommend using one of our other backends.

Many workloads work well on the DuckDB and Polars backends, for example.
:::


## Install

Install Ibis and dependencies for the pandas backend:
Expand Down
32 changes: 31 additions & 1 deletion docs/backends/snowflake.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,41 @@ con = ibis.snowflake.connect(

```python
con = ibis.connect(
f"snowflake://{user}@{account}/{database}?warehouse={warehouse}",
f"snowflake://{user}@{account}/{database}/{schema}?warehouse={warehouse}",
authenticator="externalbrowser",
)
```

### Authenticating with Key Pair Authentication

Ibis supports connecting to Snowflake warehouses using private keys.

You can use it in the explicit-parameters-style or in the URL-style connection
APIs.

#### Explicit

```python
con = ibis.snowflake.connect(
user="user",
account="safpqpq-sq55555",
database="my_database",
schema="my_schema",
warehouse="my_warehouse",
# extracted private key from .p8 file
private_key=os.getenv(SNOWFLAKE_PKEY),
)
```

#### URL

```python
con = ibis.connect(
f"snowflake://{user}@{account}/{database}/{schema}?warehouse={warehouse}",
private_key=os.getenv(SNOWFLAKE_PKEY),
)
```

### Looking up your Snowflake organization ID and user ID

A [Snowflake account
Expand Down
6 changes: 2 additions & 4 deletions ibis/backends/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,14 @@ def do_connect(
>>> ibis.dask.connect(data)
"""
if dictionary is None:
dictionary = {}
super().do_connect(dictionary)

for k, v in dictionary.items():
for k, v in self.dictionary.items():
if not isinstance(v, (dd.DataFrame, pd.DataFrame)):
raise TypeError(
f"Expected an instance of 'dask.dataframe.DataFrame' for {k!r},"
f" got an instance of '{type(v).__name__}' instead."
)
super().do_connect(dictionary)

def disconnect(self) -> None:
pass
Expand Down
12 changes: 11 additions & 1 deletion ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
except ImportError:
SessionConfig = None

try:
from datafusion import RuntimeConfig
except ImportError:
RuntimeConfig = None

if TYPE_CHECKING:
import pandas as pd
import polars as pl
Expand Down Expand Up @@ -105,7 +110,12 @@ def do_connect(
).with_information_schema(True)
else:
df_config = None
self.con = SessionContext(df_config)
if RuntimeConfig is None:
self.con = SessionContext(df_config)
else:
# datafusion 40.1.0 has a bug where SessionContext requires
# both SessionConfig and RuntimeConfig be provided.
self.con = SessionContext(df_config, RuntimeConfig())

self._register_builtin_udfs()

Expand Down
5 changes: 0 additions & 5 deletions ibis/backends/datafusion/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import pandas.testing as tm
import pytest
from packaging.version import parse as vparse

import ibis.expr.datatypes as dt
import ibis.expr.types as ir
Expand Down Expand Up @@ -69,10 +68,6 @@ def median(a: float) -> float:
assert result == con.tables.batting.G.execute().median()


@pytest.mark.xfail(
condition=vparse(datafusion.__version__) >= vparse("38.0.1"),
reason="internal error about MEDIAN(G) naming",
)
def test_builtin_agg_udf_filtered(con):
@udf.agg.builtin
def median(a: float, where: bool = True) -> float:
Expand Down
40 changes: 38 additions & 2 deletions ibis/backends/mssql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from contextlib import closing
from operator import itemgetter
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote_plus

import pyodbc
import sqlglot as sg
Expand All @@ -21,12 +22,13 @@
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import util
from ibis.backends import CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl
from ibis.backends import CanCreateCatalog, CanCreateDatabase, CanCreateSchema
from ibis.backends.sql import SQLBackend
from ibis.backends.sql.compilers.base import STAR, C

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping
from urllib.parse import ParseResult

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -73,7 +75,7 @@ def datetimeoffset_to_datetime(value):
# Databases: sys.schemas


class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema, NoUrl):
class Backend(SQLBackend, CanCreateCatalog, CanCreateDatabase, CanCreateSchema):
name = "mssql"
compiler = sc.mssql.compiler
supports_create_or_replace = False
Expand Down Expand Up @@ -169,6 +171,40 @@ def _post_connect(self):
with closing(self.con.cursor()) as cur:
cur.execute("SET DATEFIRST 1")

def _from_url(self, url: ParseResult, **kwargs):
database, *_ = url.path[1:].split("/", 1)
kwargs.update(
{
"user": url.username,
"password": unquote_plus(url.password or ""),
"host": url.hostname,
"database": database or "",
"port": url.port or None,
}
)

self._convert_kwargs(kwargs)

if "host" in kwargs and not kwargs["host"]:
del kwargs["host"]

if "user" in kwargs and not kwargs["user"]:
del kwargs["user"]

if "password" in kwargs and kwargs["password"] is None:
del kwargs["password"]

if "port" in kwargs and kwargs["port"] is None:
del kwargs["port"]

if "database" in kwargs and not kwargs["database"]:
del kwargs["database"]

if "driver" in kwargs and not kwargs["driver"]:
del kwargs["driver"]

return self.connect(**kwargs)

def get_schema(
self, name: str, *, catalog: str | None = None, database: str | None = None
) -> sch.Schema:
Expand Down
24 changes: 24 additions & 0 deletions ibis/backends/mssql/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from urllib.parse import urlencode

import pytest
import sqlglot as sg
import sqlglot.expressions as sge
Expand All @@ -8,6 +10,14 @@
import ibis
import ibis.expr.datatypes as dt
from ibis import udf
from ibis.backends.mssql.tests.conftest import (
IBIS_TEST_MSSQL_DB,
MSSQL_HOST,
MSSQL_PASS,
MSSQL_PORT,
MSSQL_PYODBC_DRIVER,
MSSQL_USER,
)

RAW_DB_TYPES = [
# Exact numbers
Expand Down Expand Up @@ -204,3 +214,17 @@ def test_create_temp_table_from_obj(con):
assert persisted_from_temp.to_pyarrow().equals(t2.to_pyarrow())

con.drop_table("fuhreal")


def test_from_url():
user = MSSQL_USER
password = MSSQL_PASS
host = MSSQL_HOST
port = MSSQL_PORT
database = IBIS_TEST_MSSQL_DB
driver = MSSQL_PYODBC_DRIVER
new_con = ibis.connect(
f"mssql://{user}:{password}@{host}:{port}/{database}?{urlencode(dict(driver=driver))}"
)
result = new_con.sql("SELECT 1 AS [a]").to_pandas().a.iat[0]
assert result == 1
2 changes: 1 addition & 1 deletion ibis/backends/oracle/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_list_tables_schema_warning_refactor(con):
assert con.list_tables(database="SYS", like="EXU8OPT") == ["EXU8OPT"]


def test_from_url(con):
def test_from_url():
new_con = ibis.connect("oracle://ibis:ibis@localhost:1521/IBIS_TESTING")

assert new_con.list_tables()
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from functools import lru_cache
from typing import TYPE_CHECKING, Any

Expand Down Expand Up @@ -51,6 +52,10 @@ def do_connect(
<ibis.backends.pandas.Backend at 0x...>
"""
warnings.warn(
f"The {self.name} backend is slated for removal in 10.0.",
DeprecationWarning,
)
self.dictionary = dictionary or {}
self.schemas: MutableMapping[str, sch.Schema] = {}

Expand Down
Loading

0 comments on commit 16bfe86

Please sign in to comment.