diff --git a/CHANGELOG.md b/CHANGELOG.md index c365f51..75244bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## ?.?.? - Unreleased +* New commands `datasets delete-version`, `datasets delete-edition`, and + `datasets delete-distribution` for deleting dataset versions, editions, and + distributions respectively. * More robust dataset/version/edition URI parsing. ## 4.2.0 - 2024-06-18 diff --git a/doc/datasets.md b/doc/datasets.md index 703fbba..5688fa9 100644 --- a/doc/datasets.md +++ b/doc/datasets.md @@ -24,8 +24,8 @@ To explore datasets in Okdata you can use the following commands: ```bash okdata datasets ls -okdata datasets ls -okdata datasets ls +okdata datasets ls +okdata datasets ls ``` To start exploring the datasets in Okdata you do not need to log in, but based on the permissions set on each dataset you might get different lists. @@ -112,13 +112,13 @@ File: `version.json` Create a new dataset version by piping the contents of `version.json`: ```bash -cat version.json | okdata datasets create-version +cat version.json | okdata datasets create-version ``` Or create it by referencing the file: ```bash -okdata datasets create-version --file=version.json +okdata datasets create-version --file=version.json ``` ## Create edition @@ -134,11 +134,11 @@ File: `edition.json` ``` Create the dataset version edition by piping the contents of `edition.json`: ```bash -cat edition.json | okdata datasets create-edition +cat edition.json | okdata datasets create-edition ``` Or create it by referencing the file: ```bash -okdata datasets create-edition --file=edition.json +okdata datasets create-edition --file=edition.json ``` ## Upload file to edition @@ -148,12 +148,12 @@ hello, world world, hello ``` -Upload the file with the `cp` command to the `` dataset. Note the +Upload the file with the `cp` command to the `` dataset. Note the `ds:` prefix for the target dataset. To upload a file to a specific version and edition: ```bash -okdata datasets cp /tmp/test.txt ds:// +okdata datasets cp /tmp/test.txt ds:// ``` By using the special edition ID `latest`, the file will be uploaded to the @@ -163,13 +163,13 @@ If no version or edition is provided, a new edition will be created for the latest version automatically: ```bash -okdata datasets cp /tmp/test.txt ds: +okdata datasets cp /tmp/test.txt ds: ``` Or to upload to a new edition of a specific version: ```bash -okdata datasets cp /tmp/test.txt ds:/ +okdata datasets cp /tmp/test.txt ds:/ ``` ### Inspecting the upload status @@ -178,11 +178,11 @@ After uploading a file to a dataset using the `okdata datasets cp` command, a trace ID is displayed which can be used to track the uploading process status: ```text -+-------------+---------------+-----------+-------------+ -| Dataset | Local file | Uploaded? | Trace ID | -+-------------+---------------+-----------+-------------+ -| | /tmp/test.txt | Yes | | -+-------------+---------------+-----------+-------------+ ++--------------+---------------+-----------+-------------+ +| Dataset | Local file | Uploaded? | Trace ID | ++--------------+---------------+-----------+-------------+ +| | /tmp/test.txt | Yes | | ++--------------+---------------+-----------+-------------+ ``` To see the latest status of the upload, run: @@ -218,14 +218,14 @@ echo "Uploaded file is processed and ready to be consumed" The `okdata datasets cp` command can also be used to download data form a dataset URI: ```bash -okdata datasets cp ds:// my/target/directory +okdata datasets cp ds:// my/target/directory ``` If no version or edition is provided, the latest version and edition will be used by default (if they exist): ```bash -okdata datasets cp ds: my/target/directory +okdata datasets cp ds: my/target/directory ``` The target directory will be created if it doesn't already eixst on the local filesystem. The CLI also supports the use of `.` to specify the current working directory as output target. diff --git a/okdata/cli/commands/datasets/datasets.py b/okdata/cli/commands/datasets/datasets.py index 2718bd1..ec78778 100644 --- a/okdata/cli/commands/datasets/datasets.py +++ b/okdata/cli/commands/datasets/datasets.py @@ -19,9 +19,12 @@ class DatasetsCommand(BaseCommand): okdata datasets ls [options] okdata datasets cp [options] okdata datasets create [options] - okdata datasets create-version [options] - okdata datasets create-edition [] [options] - okdata datasets create-distribution [ ] [options] + okdata datasets create-version [options] + okdata datasets create-edition [] [options] + okdata datasets create-distribution [ ] [options] + okdata datasets delete-version [--cascade] [options] + okdata datasets delete-edition [--cascade] [options] + okdata datasets delete-distribution [options] Examples: okdata datasets ls @@ -60,6 +63,12 @@ def handler(self): self.create_edition() elif self.cmd("create-distribution"): self.create_distribution() + elif self.cmd("delete-version"): + self.delete_version() + elif self.cmd("delete-edition"): + self.delete_edition() + elif self.cmd("delete-distribution"): + self.delete_distribution() else: self.help() @@ -134,7 +143,7 @@ def version_information(self, dataset_id, version): self.print(f"Editions available for: {dataset_id}, version: {version}", out) def create_version(self): - dataset_id = self.arg("datasetid") + dataset_id = self.arg("dataset_id") payload = read_json(self.opt("file")) self.log.info( f"Creating version for dataset: {dataset_id} with payload: {payload}" @@ -142,7 +151,7 @@ def create_version(self): version = self.sdk.create_version(dataset_id, payload) version_id = version["Id"] - self.log.info(f"Created version: {version_id} on dataset: {dataset_id}") + self.log.info(f"Created version: {version_id} on dataset: {dataset_id}") self.print(f"Created version: {version_id}", version) def _get_latest_version(self, dataset_id, exit_on_error=True): @@ -163,18 +172,34 @@ def _get_latest_version(self, dataset_id, exit_on_error=True): return None raise - def resolve_or_load_versionid(self, dataset_id): - self.log.info(f"Trying to resolve versionid for {dataset_id}") - version_id = self.arg("versionid") - if version_id is not None: - self.log.info(f"Found version in arguments: {version_id}") - return version_id + def resolve_or_load_version(self, dataset_id): + self.log.info(f"Trying to resolve version for {dataset_id}") + version = self.arg("version") + if version is not None: + self.log.info(f"Found version in arguments: {version}") + return version latest_version = self._get_latest_version(dataset_id) self.log.info( f"Found version in latest dataset version: {latest_version['version']}" ) return latest_version["version"] + def delete_version(self): + version_id = self.arg("version_id") + cascade = self.opt("cascade") + self.log.info(f"Deleting version {version_id} [cascade: {bool(cascade)}]") + try: + dataset_id, version = version_id.split("/") + except ValueError: + sys.exit("Version ID must be on the format 'dataset_id/version'.") + self.sdk.delete_version(dataset_id, version, cascade) + self.print( + "Deleted version {}{}.".format( + version, + " and every child edition and distribution" if cascade else "", + ) + ) + # #################################### # # Edition # #################################### # @@ -197,24 +222,24 @@ def edition_information(self, dataset_id, version, edition): def create_edition(self): payload = read_json(self.opt("file")) - dataset_id = self.arg("datasetid") - version_id = self.resolve_or_load_versionid(dataset_id) + dataset_id = self.arg("dataset_id") + version = self.resolve_or_load_version(dataset_id) self.log.info( - f"Creating edition for {version_id} on {dataset_id} with payload: {payload}" + f"Creating edition for {version} on {dataset_id} with payload: {payload}" ) - edition = self.sdk.create_edition(dataset_id, version_id, payload) - self.print(f"Created edition for {version_id} on {dataset_id}", edition) + edition = self.sdk.create_edition(dataset_id, version, payload) + self.print(f"Created edition for {version} on {dataset_id}", edition) return edition - def resolve_or_create_edition(self, dataset_id, version_id): - self.log.info(f"Trying to resolve edition for {version_id} on {dataset_id}") - edition_id = self.arg("editionid") - if edition_id is not None: - self.log.info(f"Found edition in arguments: {edition_id}") - return edition_id + def resolve_or_create_edition(self, dataset_id, version): + self.log.info(f"Trying to resolve edition for {version} on {dataset_id}") + edition = self.arg("edition") + if edition is not None: + self.log.info(f"Found edition in arguments: {edition}") + return edition - return self.sdk.auto_create_edition(dataset_id, version_id) + return self.sdk.auto_create_edition(dataset_id, version) def get_latest_or_create_edition(self, dataset_id, version): self.log.info(f"Resolving edition for dataset-uri: {dataset_id}/{version}") @@ -225,23 +250,51 @@ def get_latest_or_create_edition(self, dataset_id, version): return self.sdk.auto_create_edition(dataset_id, version) raise + def delete_edition(self): + edition_id = self.arg("edition_id") + cascade = self.opt("cascade") + self.log.info(f"Deleting edition {edition_id} [cascade: {bool(cascade)}]") + try: + dataset_id, version, edition = edition_id.split("/") + except ValueError: + sys.exit("Edition ID must be on the format 'dataset_id/version/edition'.") + self.sdk.delete_edition(dataset_id, version, edition, cascade) + self.print( + "Deleted edition {}{}.".format( + edition, + " and every child distribution" if cascade else "", + ) + ) + # #################################### # # Distribution # #################################### # def create_distribution(self): payload = read_json(self.opt("file")) - dataset_id = self.arg("datasetid") - version_id = self.resolve_or_load_versionid(dataset_id) - edition_id = self.resolve_or_create_edition(dataset_id, version_id)["Id"] + dataset_id = self.arg("dataset_id") + version = self.resolve_or_load_version(dataset_id) + edition_id = self.resolve_or_create_edition(dataset_id, version)["Id"] edition = edition_id.split("/")[-1] self.log.info(f"Creating distribution for {edition_id} with payload: {payload}") distribution = self.sdk.create_distribution( - dataset_id, version_id, edition, payload + dataset_id, version, edition, payload ) self.print(f"Created distribution for {edition_id}", distribution) return distribution + def delete_distribution(self): + dist_id = self.arg("distribution_id") + self.log.info(f"Deleting distribution {dist_id}") + try: + dataset_id, version, edition, dist = dist_id.split("/") + except ValueError: + sys.exit( + "Distribution ID must be on the format 'dataset_id/version/edition/distribution'." + ) + self.sdk.delete_distribution(dataset_id, version, edition, dist) + self.print(f"Deleted distribution {dist_id}") + # #################################### # # File handling # #################################### # diff --git a/requirements.txt b/requirements.txt index 5c6a3fe..f35b361 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile @@ -24,7 +24,7 @@ jsonschema==4.4.0 # via okdata-sdk jwcrypto==1.5.6 # via python-keycloak -okdata-sdk==3.1.1 +okdata-sdk==3.2.0 # via okdata-cli (setup.py) packaging==24.0 # via deprecation diff --git a/setup.py b/setup.py index 8b490aa..bf36897 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ install_requires=[ "PrettyTable", "docopt", - "okdata-sdk>=3.1.1,<4", + "okdata-sdk>=3.2,<4", "questionary>=1.10.0,<2.0.0", "requests", ],