Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test for utils and remove deprecated #727

Merged
merged 11 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions lumen/ai/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

import param

from pydantic import BaseConfig, BaseModel, create_model
from pydantic.color import Color
from pydantic import BaseModel, ConfigDict, create_model
from pydantic.fields import FieldInfo, PydanticUndefined
from pydantic_extra_types.color import Color

DATE_TYPE = datetime.datetime | datetime.date
PARAM_TYPE_MAPPING: dict[param.Parameter, type] = {
Expand All @@ -36,8 +36,7 @@ class ArbitraryTypesModel(BaseModel):
A Pydantic model that allows arbitrary types.
"""

class Config(BaseConfig):
arbitrary_types_allowed = True
model_config = ConfigDict(arbitrary_types_allowed=True)


def _create_literal(obj: list[str | type]) -> type:
Expand Down
2 changes: 1 addition & 1 deletion lumen/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ async def get_schema(
continue
elif not include_enum:
spec.pop("enum")
elif "limit" in get_kwargs:
elif "limit" in get_kwargs and len(spec["enum"]) > get_kwargs["limit"]:
spec["enum"].append("...")

if count and include_count:
Expand Down
307 changes: 307 additions & 0 deletions lumen/tests/ai/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
from pathlib import Path
from unittest.mock import MagicMock, patch

import jinja2
import numpy as np
import pandas as pd
import pytest

from panel.chat import ChatStep

try:
from lumen.ai.utils import (
UNRECOVERABLE_ERRORS, clean_sql, describe_data, format_schema,
get_schema, render_template, report_error, retry_llm_output,
)
except ImportError:
pytest.skip("Skipping tests that require lumen.ai", allow_module_level=True)


def test_render_template_with_valid_template():
template_content = "Hello {{ name }}!"
with patch.object(Path, "read_text", return_value=template_content):
result = render_template("test_template.txt", name="World")
assert result == "Hello World!"


def test_render_template_missing_key():
template_content = "Hello {{ name }}!"
with patch.object(Path, "read_text", return_value=template_content):
with pytest.raises(jinja2.exceptions.UndefinedError):
render_template("test_template.txt")


class TestRetryLLMOutput:

@patch("time.sleep", return_value=None)
def test_success(self, mock_sleep):
@retry_llm_output(retries=2)
def mock_func(errors=None):
return "Success"

result = mock_func()
assert result == "Success"
assert mock_sleep.call_count == 0

@patch("time.sleep", return_value=None)
def test_failure(self, mock_sleep):
@retry_llm_output(retries=2)
def mock_func(errors=None):
if errors is not None:
assert errors == ["Failed"]
raise Exception("Failed")

with pytest.raises(Exception, match="Failed"):
mock_func()
assert mock_sleep.call_count == 1

@patch("time.sleep", return_value=None)
def test_failure_unrecoverable(self, mock_sleep):
@retry_llm_output(retries=2)
def mock_func(errors=None):
if errors is not None:
assert errors == ["Failed"]
raise unrecoverable_error("Failed")

unrecoverable_error = UNRECOVERABLE_ERRORS[0]
with pytest.raises(unrecoverable_error, match="Failed"):
mock_func(errors=["Failed"])
assert mock_sleep.call_count == 0

@patch("asyncio.sleep", return_value=None)
async def test_async_success(self, mock_sleep):
@retry_llm_output(retries=2)
async def mock_func(errors=None):
return "Success"

result = await mock_func()
assert result == "Success"
assert mock_sleep.call_count == 0

@patch("asyncio.sleep", return_value=None)
async def test_async_failure(self, mock_sleep):
@retry_llm_output(retries=2)
async def mock_func(errors=None):
if errors is not None:
assert errors == ["Failed"]
raise Exception("Failed")

with pytest.raises(Exception, match="Failed"):
await mock_func()
assert mock_sleep.call_count == 1

@patch("asyncio.sleep", return_value=None)
async def test_async_failure_unrecoverable(self, mock_sleep):
@retry_llm_output(retries=2)
async def mock_func(errors=None):
if errors is not None:
assert errors == ["Failed"]
raise unrecoverable_error("Failed")

unrecoverable_error = UNRECOVERABLE_ERRORS[0]
with pytest.raises(unrecoverable_error, match="Failed"):
await mock_func(errors=["Failed"])
assert mock_sleep.call_count == 0


def test_format_schema_with_enum():
schema = {
"field1": {"type": "string", "enum": ["a", "b", "c", "d", "e", "f"]},
"field2": {"type": "integer"},
}
expected = {
"field1": {"type": "str", "enum": ["a", "b", "c", "d", "e", "..."]},
"field2": {"type": "int"},
}
assert format_schema(schema) == expected


def test_format_schema_no_enum():
schema = {
"field1": {"type": "boolean"},
"field2": {"type": "integer"},
}
expected = {
"field1": {"type": "bool"},
"field2": {"type": "int"},
}
assert format_schema(schema) == expected


class TestGetSchema:

async def test_get_schema_from_source(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {"field1": {"type": "integer"}}
schema = await get_schema(mock_source)
assert "field1" in schema
assert schema["field1"]["type"] == "int"

async def test_min_max(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {
"field1": {
"type": "integer",
"inclusiveMinimum": 0,
"inclusiveMaximum": 100,
}
}
schema = await get_schema(mock_source, include_min_max=True)
assert "min" in schema["field1"]
assert "max" in schema["field1"]
assert schema["field1"]["min"] == 0
assert schema["field1"]["max"] == 100

async def test_no_min_max(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {
"field1": {
"type": "integer",
"inclusiveMinimum": 0,
"inclusiveMaximum": 100,
}
}
schema = await get_schema(mock_source, include_min_max=False)
assert "min" not in schema["field1"]
assert "max" not in schema["field1"]
assert "inclusiveMinimum" not in schema["field1"]
assert "inclusiveMaximum" not in schema["field1"]

async def test_enum(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {
"field1": {"type": "string", "enum": ["value1", "value2"]}
}
schema = await get_schema(mock_source, include_enum=True)
assert "enum" in schema["field1"]
assert schema["field1"]["enum"] == ["value1", "value2"]

async def test_no_enum(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {
"field1": {"type": "string", "enum": ["value1", "value2"]}
}
schema = await get_schema(mock_source, include_enum=False)
assert "enum" not in schema["field1"]

async def test_enum_limit(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {
"field1": {"type": "string", "enum": ["value1", "value2", "value3"]}
}
schema = await get_schema(mock_source, include_enum=True, limit=2)
assert "enum" in schema["field1"]
assert "..." in schema["field1"]["enum"]

async def test_count(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {
"field1": {"type": "integer"},
"count": 1000,
}
schema = await get_schema(mock_source, include_count=True)
assert "count" in schema["field1"]
assert schema["field1"]["count"] == 1000

async def test_no_count(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {
"field1": {"type": "integer"},
"count": 1000,
}
schema = await get_schema(mock_source, include_count=False)
assert "count" not in schema

async def test_table(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {"field1": {"type": "integer"}}
schema = await get_schema(mock_source, table="test_table")
mock_source.get_schema.assert_called_with("test_table", limit=100)
assert "field1" in schema

async def test_custom_limit(self):
mock_source = MagicMock()
mock_source.get_schema.return_value = {"field1": {"type": "integer"}}
schema = await get_schema(mock_source, table="test_table", limit=50)
mock_source.get_schema.assert_called_with("test_table", limit=50)
assert "field1" in schema

class TestDescribeData:

async def test_describe_numeric_data(self):
df = pd.DataFrame({
"col1": np.arange(0, 100000),
"col2": np.arange(0, 100000)
})
result = await describe_data(df)
assert "col1" in result["stats"]
assert "col2" in result["stats"]
assert result["stats"]["col1"]["nulls"] == '0'
assert result["stats"]["col2"]["nulls"] == '0'

async def test_describe_with_nulls(self):
df = pd.DataFrame({
"col1": np.arange(0, 100000),
"col2": np.arange(0, 100000)
})
df.loc[:5000, "col1"] = np.nan
df.loc[:5000, "col2"] = np.nan
result = await describe_data(df)
assert result["stats"]["col1"]["nulls"] != '0'
assert result["stats"]["col2"]["nulls"] != '0'

async def test_describe_string_data(self):
df = pd.DataFrame({
"col1": ["apple", "banana", "cherry", "date", "elderberry"] * 2000,
"col2": ["a", "b", "c", "d", "e"] * 2000
})
result = await describe_data(df)
assert result["stats"]["col1"]["nunique"] == 5
assert result["stats"]["col2"]["lengths"]["max"] == 1
assert result["stats"]["col1"]["lengths"]["max"] == 10

async def test_describe_datetime_data(self):
df = pd.DataFrame({
"col1": pd.date_range("2018-08-18", periods=10000),
"col2": pd.date_range("2018-08-18", periods=10000),
})
result = await describe_data(df)
assert "col1" in result["stats"]
assert "col2" in result["stats"]

async def test_describe_large_data(self):
df = pd.DataFrame({
"col1": range(6000),
"col2": range(6000, 12000)
})
result = await describe_data(df)
assert result["summary"]["is_summarized"] is True
assert len(df.sample(5000)) == 5000 # Should summarize to 5000 rows

async def test_describe_small_data(self):
df = pd.DataFrame({
"col1": [1, 2],
"col2": [3, 4]
})
result = await describe_data(df)
assert result.equals(df)


def test_clean_sql_removes_backticks():
sql_expr = "```sql SELECT * FROM `table`; ```"
cleaned_sql = clean_sql(sql_expr)
assert cleaned_sql == 'SELECT * FROM "table"'


def test_clean_sql_strips_whitespace_and_semicolons():
sql_expr = "SELECT * FROM table; "
cleaned_sql = clean_sql(sql_expr)
assert cleaned_sql == "SELECT * FROM table"


def test_report_error():
step = ChatStep()
report_error(Exception("Test error"), step)
assert step.failed_title == "Test error"
assert step.status == "failed"
assert step.objects[0].object == "\n```python\nTest error\n```"
13 changes: 13 additions & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pytest-cov = "*"
pytest-github-actions-annotate-failures = "*"
pytest-rerunfailures = "*"
pytest-xdist = "*"
pytest-asyncio = "*"

[feature.test-core.tasks]
test-unit = 'pytest lumen/tests -n logical --dist loadgroup'
Expand All @@ -64,6 +65,17 @@ toolz = "*"
intake-sql = "*"
python-duckdb = "*"
sqlalchemy = "*"
# ai
datashader = "*"
duckdb = "*"
instructor = ">=1.4.3"
nbformat = "*"
openai = "*"
pyarrow = "*"
pydantic = ">=2.8.0"

[feature.test.pypi-dependencies]
pydantic_extra_types = "*"

# [feature.ai.dependencies]
# datashader = "*"
Expand All @@ -74,6 +86,7 @@ sqlalchemy = "*"
# pyarrow = "*"
# pydantic = ">=2.8.0"
#

# [feature.ai-local.dependencies]
# huggingface_hub = "*"
#
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ Source = "https://github.com/holoviz/lumen"
HoloViz = "https://holoviz.org/"

[project.optional-dependencies]
tests = ['pytest', 'pytest-rerunfailures']
tests = ['pytest', 'pytest-rerunfailures', 'pytest-asyncio']
sql = ['duckdb', 'intake-sql', 'sqlalchemy']
ai = ['nbformat', 'duckdb', 'pyarrow', 'openai', 'instructor >=1.4.3', 'pydantic >=2.8.0', 'datashader']
ai = ['nbformat', 'duckdb', 'pyarrow', 'openai', 'instructor >=1.4.3', 'pydantic >=2.8.0', 'datashader', 'pydantic-extra-types']
ai-local = ['lumen[ai]', 'huggingface_hub']
ai-llama = ['lumen[ai-local]', 'llama-cpp-python']

Expand Down Expand Up @@ -79,6 +79,8 @@ addopts = [
"--doctest-modules",
"--doctest-ignore-import-errors",
]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
minversion = "7"
xfail_strict = true
log_cli_level = "INFO"
Expand Down
Loading