Skip to content

Commit

Permalink
move dataset out of experimental (#384)
Browse files Browse the repository at this point in the history
* move dataset out of experimental

* change split type for experimental tests

* change split type for experimental tests

* new cache key

* only run experimental once

* moving test out

* test fix

* remove unused function

* added dataset pytest group

* fix test

* stand alone dataset test
  • Loading branch information
grant-Kolena authored Jan 3, 2024
1 parent 458df79 commit f861c84
Show file tree
Hide file tree
Showing 15 changed files with 69 additions and 77 deletions.
2 changes: 2 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ jobs:
tests/integration/fr/.* fr true
kolena/workflow/.* workflow true
tests/integration/workflow/.* workflow true
kolena/dataset/.* dataset true
tests/integration/dataset/.* dataset true
kolena/_(api|utils)/.* all true
kolena/[^/]*.py all true
tests/integration/[^/]*.py all true
Expand Down
25 changes: 21 additions & 4 deletions .circleci/continue_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ parameters:
workflow:
type: boolean
default: false
dataset:
type: boolean
default: false
misc:
type: boolean
default: false
Expand Down Expand Up @@ -219,9 +222,13 @@ jobs:
export KOLENA_TOKEN=${!token}
- when:
condition:
equal:
- "none"
- << parameters.extras >>
and:
- equal:
- "none"
- << parameters.extras >>
- not_equal:
- _experimental
- << parameters.pytest-group >>
steps:
- run:
name: Run << parameters.pytest-group >> integration tests
Expand Down Expand Up @@ -341,11 +348,21 @@ workflows:
matrix:
parameters:
python-version: [ "3.9.18" ]
extras: [ "none", "metrics" ]
extras: [ "metrics" ]
pytest-group: _experimental
enabled: true
requires:
- ci-base-<< matrix.python-version >>-<< matrix.extras >>
- integration-test:
name: integration-test-dataset-<< matrix.python-version >>-<< matrix.extras >>
matrix:
parameters:
python-version: [ "3.9.18" ]
extras: [ "none" ]
pytest-group: dataset
enabled: true
requires:
- ci-base-<< matrix.python-version >>-<< matrix.extras >>
- integration-test:
name: integration-test-misc-<< matrix.python-version >>-<< matrix.extras >>
matrix:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from question_answering.constants import TRUTHFULQA

import kolena
from kolena._experimental.dataset import register_dataset
from kolena.dataset import register_dataset
from kolena.workflow.io import dataframe_from_csv

DATASETS = {
Expand Down
27 changes: 0 additions & 27 deletions kolena/_experimental/dataset/__init__.py

This file was deleted.

14 changes: 11 additions & 3 deletions kolena/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# noreorder
from kolena.workflow import annotation
from kolena.workflow import asset
from kolena.dataset.dataset import fetch_dataset_history
from kolena.dataset.dataset import fetch_dataset
from kolena.dataset.dataset import register_dataset
from kolena.dataset.evaluation import fetch_results
from kolena.dataset.evaluation import test
from kolena.workflow import annotation, asset

__all__ = [
"register_dataset",
"fetch_dataset_history",
"fetch_dataset",
"fetch_results",
"test",
"annotation",
"asset",
]
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@
from kolena._api.v2.dataset import LoadDatasetByNameRequest
from kolena._api.v2.dataset import Path
from kolena._api.v2.dataset import RegisterRequest
from kolena._experimental.dataset.common import COL_DATAPOINT
from kolena._experimental.dataset.common import COL_DATAPOINT_ID_OBJECT
from kolena._experimental.dataset.common import validate_batch_size
from kolena._experimental.dataset.common import validate_dataframe_ids
from kolena._utils import krequests_v2 as krequests
from kolena._utils import log
from kolena._utils.batched_load import _BatchedLoader
Expand All @@ -46,6 +42,10 @@
from kolena._utils.endpoints import get_dataset_url
from kolena._utils.serde import from_dict
from kolena._utils.state import API_V2
from kolena.dataset.common import COL_DATAPOINT
from kolena.dataset.common import COL_DATAPOINT_ID_OBJECT
from kolena.dataset.common import validate_batch_size
from kolena.dataset.common import validate_dataframe_ids
from kolena.errors import InputValidationError
from kolena.workflow._datatypes import _deserialize_dataobject
from kolena.workflow._datatypes import _serialize_dataobject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,29 @@
from kolena._api.v2.model import LoadResultsRequest
from kolena._api.v2.model import Path
from kolena._api.v2.model import UploadResultsRequest
from kolena._experimental.dataset._dataset import _iter_dataset_raw
from kolena._experimental.dataset._dataset import _to_deserialized_dataframe
from kolena._experimental.dataset._dataset import _to_serialized_dataframe
from kolena._experimental.dataset._dataset import load_dataset
from kolena._experimental.dataset.common import COL_DATAPOINT
from kolena._experimental.dataset.common import COL_DATAPOINT_ID_OBJECT
from kolena._experimental.dataset.common import COL_EVAL_CONFIG
from kolena._experimental.dataset.common import COL_RESULT
from kolena._experimental.dataset.common import validate_batch_size
from kolena._experimental.dataset.common import validate_dataframe_ids
from kolena._utils import krequests_v2 as krequests
from kolena._utils import log
from kolena._utils.batched_load import _BatchedLoader
from kolena._utils.batched_load import init_upload
from kolena._utils.batched_load import upload_data_frame
from kolena._utils.consts import BatchSize
from kolena._utils.state import API_V2
from kolena.dataset.common import COL_DATAPOINT
from kolena.dataset.common import COL_DATAPOINT_ID_OBJECT
from kolena.dataset.common import COL_EVAL_CONFIG
from kolena.dataset.common import COL_RESULT
from kolena.dataset.common import validate_batch_size
from kolena.dataset.common import validate_dataframe_ids
from kolena.dataset.dataset import _to_deserialized_dataframe
from kolena.dataset.dataset import _to_serialized_dataframe
from kolena.dataset.dataset import load_dataset
from kolena.errors import IncorrectUsageError
from kolena.errors import NotFoundError

TYPE_EVALUATION_CONFIG = Optional[Dict[str, Any]]
TEST_ON_TYPE = Optional[Union[str, List[str]]]


def _fetch_dataset(dataset: str) -> pd.DataFrame:
df_data_batch = list(_iter_dataset_raw(dataset))
df_datapoints = pd.concat(df_data_batch) if df_data_batch else pd.DataFrame(columns=["id", COL_DATAPOINT])
df_datapoints.rename(columns={"id": "datapoint_id"}, inplace=True)
return df_datapoints


def _iter_result_raw(dataset: str, model: str, batch_size: int) -> Iterator[pd.DataFrame]:
validate_batch_size(batch_size)
init_request = LoadResultsRequest(dataset=dataset, model=model, batch_size=batch_size)
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
from pandas.testing import assert_frame_equal

from kolena._api.v2.dataset import CommitData
from kolena._experimental.dataset import fetch_dataset
from kolena._experimental.dataset import fetch_dataset_history
from kolena._experimental.dataset import register_dataset
from kolena.dataset import fetch_dataset
from kolena.dataset import fetch_dataset_history
from kolena.dataset import register_dataset
from kolena.errors import NotFoundError
from kolena.workflow.annotation import BoundingBox
from kolena.workflow.annotation import LabeledBoundingBox
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
import pytest
from pandas.testing import assert_frame_equal

from kolena._experimental.dataset import fetch_dataset
from kolena._experimental.dataset import fetch_results
from kolena._experimental.dataset import register_dataset
from kolena._experimental.dataset import test
from kolena.dataset import fetch_dataset
from kolena.dataset import fetch_results
from kolena.dataset import register_dataset
from kolena.dataset import test
from kolena.errors import IncorrectUsageError
from kolena.errors import NotFoundError
from tests.integration._experimental.dataset.test_dataset import batch_iterator
from tests.integration.dataset.test_dataset import batch_iterator
from tests.integration.helper import fake_locator
from tests.integration.helper import with_test_prefix

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
import pandas as pd
import pytest

from kolena._experimental.dataset.common import validate_dataframe_ids
from kolena._experimental.dataset.common import validate_id_fields
from kolena.dataset.common import validate_dataframe_ids
from kolena.dataset.common import validate_id_fields
from kolena.errors import InputValidationError


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@
from .data import a_text
from .data import b_text
from kolena._api.v2.dataset import EntityData
from kolena._experimental.dataset._dataset import _add_datatype
from kolena._experimental.dataset._dataset import _flatten_composite
from kolena._experimental.dataset._dataset import _infer_datatype
from kolena._experimental.dataset._dataset import _infer_datatype_value
from kolena._experimental.dataset._dataset import _infer_id_fields
from kolena._experimental.dataset._dataset import _to_deserialized_dataframe
from kolena._experimental.dataset._dataset import _to_serialized_dataframe
from kolena._experimental.dataset._dataset import DatapointType
from kolena._experimental.dataset._dataset import resolve_id_fields
from kolena._experimental.dataset.common import COL_DATAPOINT
from kolena._experimental.dataset.common import COL_RESULT
from kolena.dataset.common import COL_DATAPOINT
from kolena.dataset.common import COL_RESULT
from kolena.dataset.dataset import _add_datatype
from kolena.dataset.dataset import _flatten_composite
from kolena.dataset.dataset import _infer_datatype
from kolena.dataset.dataset import _infer_datatype_value
from kolena.dataset.dataset import _infer_id_fields
from kolena.dataset.dataset import _to_deserialized_dataframe
from kolena.dataset.dataset import _to_serialized_dataframe
from kolena.dataset.dataset import DatapointType
from kolena.dataset.dataset import resolve_id_fields
from kolena.errors import InputValidationError
from kolena.workflow._datatypes import DATA_TYPE_FIELD
from kolena.workflow.annotation import BoundingBox
Expand Down

0 comments on commit f861c84

Please sign in to comment.