Skip to content

Commit

Permalink
Add logging when registering a dataset (#370)
Browse files Browse the repository at this point in the history
* Add logging when registering a dataset

* update message formatting

* Apply formatting rules to all messages

* address pr comments
  • Loading branch information
sandalns authored Dec 22, 2023
1 parent 75772c5 commit 8bc69fa
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 2 deletions.
5 changes: 5 additions & 0 deletions kolena/_experimental/dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@
from kolena._experimental.dataset.common import validate_batch_size
from kolena._experimental.dataset.common import validate_dataframe_ids
from kolena._utils import krequests_v2 as krequests
from kolena._utils import log
from kolena._utils.batched_load import _BatchedLoader
from kolena._utils.batched_load import init_upload
from kolena._utils.batched_load import upload_data_frame
from kolena._utils.consts import BatchSize
from kolena._utils.endpoints import get_dataset_url
from kolena._utils.serde import from_dict
from kolena._utils.state import API_V2
from kolena.errors import InputValidationError
Expand Down Expand Up @@ -259,6 +261,8 @@ def register_dataset(
request = RegisterRequest(name=name, id_fields=id_fields, uuid=load_uuid)
response = krequests.post(Path.REGISTER, json=asdict(request))
krequests.raise_for_status(response)
data = from_dict(EntityData, response.json())
log.info(f"Successfully registered dataset '{name}' ({get_dataset_url(dataset_id=data.id)})")


def _iter_dataset_raw(
Expand Down Expand Up @@ -300,6 +304,7 @@ def fetch_dataset(
"""
Fetch an entire dataset given its name.
"""
log.info(f"Loaded dataset '{name}'")
df_batches = list(_iter_dataset(name, commit, batch_size))
return pd.concat(df_batches, ignore_index=True) if df_batches else pd.DataFrame()

Expand Down
5 changes: 3 additions & 2 deletions kolena/_experimental/dataset/_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def fetch_results(
"""
Fetch results given dataset name and model name.
"""
log.info(f"Fetching results for model '{model}' on dataset '{dataset}'")
df = _fetch_results(dataset, model)

df_datapoints = _to_deserialized_dataframe(df.drop_duplicates(subset=[COL_DATAPOINT]), column=COL_DATAPOINT)
Expand All @@ -128,7 +129,7 @@ def fetch_results(
_to_deserialized_dataframe(df_matched, column=COL_RESULT),
),
)

log.info(f"Fetched results for model '{model}' on dataset '{dataset}'")
return df_datapoints, df_results_by_eval


Expand Down Expand Up @@ -191,4 +192,4 @@ def test(
upload_data_frame(df=df_results, batch_size=BatchSize.UPLOAD_RECORDS.value, load_uuid=load_uuid)

_upload_results(model, load_uuid, existing_dataset.id)
log.info(f"uploaded test results for model {model} on dataset {dataset}")
log.info(f"Uploaded test results for model '{model}' on dataset '{dataset}'")
9 changes: 9 additions & 0 deletions kolena/_utils/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ def _get_test_suite_url(client_state: _ClientState, test_suite_id: int) -> str:
return f"{platform_url}/testing?{urlencode(dict(testSuiteId=test_suite_id))}"


def _get_dataset_url(client_state: _ClientState, dataset_id: str) -> str:
platform_url = _get_platform_url(client_state)
return f"{platform_url}/datasets?{urlencode(dict(datasetId=dataset_id))}"


def get_dataset_url(dataset_id: int) -> str:
return _get_dataset_url(get_client_state(), dataset_id)


def get_model_url(model_id: int) -> str:
return _get_model_url(get_client_state(), model_id)

Expand Down

0 comments on commit 8bc69fa

Please sign in to comment.