diff --git a/.github/workflows/pytype.yml b/.github/workflows/pytype.yml new file mode 100644 index 0000000..937c7ec --- /dev/null +++ b/.github/workflows/pytype.yml @@ -0,0 +1,39 @@ + +name: PyType + +on: + push: + branches: ["dev"] + pull_request: + branches: ["dev", "master"] + workflow_dispatch: + +permissions: + contents: read + +jobs: + pytype: + name: Run PyType + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Create and start virtual environment + run: | + python -m venv venv + source venv/bin/activate + + - name: Install dependencies + run: | + pip install pytype + pip install . + + - name: Run pytype + run: | + pytype \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8d80134..2d07e78 100644 --- a/.gitignore +++ b/.gitignore @@ -142,7 +142,7 @@ build*/ *dist*/ dl/* -credentials/*.json +credentials/* !credentials/credentials_template.json queries/*.json !queries/query_template.json diff --git a/NGPIris/cli/__init__.py b/NGPIris/cli/__init__.py index 700a921..432d19e 100644 --- a/NGPIris/cli/__init__.py +++ b/NGPIris/cli/__init__.py @@ -19,14 +19,14 @@ def format_list(list_of_things : list) -> str: list_of_buckets = list(map(lambda s : s + "\n", list_of_things)) return "".join(list_of_buckets).strip("\n") -def _list_objects_generator(hcph : HCPHandler, name_only : bool) -> Generator[str, Any, None]: +def _list_objects_generator(hcph : HCPHandler, path : str, name_only : bool, files_only : bool) -> Generator[str, Any, None]: """ Handle object list as a paginator that `click` can handle. It works slightly different from `list_objects` in `hcp.py` in order to make the output printable in a terminal """ paginator : Paginator = hcph.s3_client.get_paginator("list_objects_v2") - pages : PageIterator = paginator.paginate(Bucket = hcph.bucket_name) + pages : PageIterator = paginator.paginate(Bucket = hcph.bucket_name, Prefix = path) (nb_of_cols, _) = get_terminal_size() max_width = floor(nb_of_cols / 5) if (not name_only): @@ -36,7 +36,11 @@ def _list_objects_generator(hcph : HCPHandler, name_only : bool) -> Generator[st tablefmt = "plain", stralign = "center" ) + "\n" + "-"*nb_of_cols + "\n" - for object in pages.search("Contents[?!ends_with(Key, '/')][]"): # filter objects that does not end with "/" + if files_only: + filter_string = "Contents[?!ends_with(Key, '/')][]" # filter objects that does not end with "/" + else: + filter_string = "Contents[*][]" + for object in pages.search(filter_string): if name_only: yield str(object["Key"]) + "\n" else: @@ -55,6 +59,11 @@ def _list_objects_generator(hcph : HCPHandler, name_only : bool) -> Generator[st def object_is_folder(object_path : str, hcph : HCPHandler) -> bool: return (object_path[-1] == "/") and (hcph.get_object(object_path)["ContentLength"] == 0) +def add_trailing_slash(path : str) -> str: + if not path[-1] == "/": + path += "/" + return path + @click.group() @click.argument("credentials") @click.version_option(package_name = "NGPIris") @@ -85,9 +94,13 @@ def upload(context : Context, bucket : str, source : str, destination : str): """ hcph : HCPHandler = get_HCPHandler(context) hcph.mount_bucket(bucket) + destination = add_trailing_slash(destination) if Path(source).is_dir(): + source = add_trailing_slash(source) hcph.upload_folder(source, destination) else: + file_name = Path(source).name + destination += file_name hcph.upload_file(source, destination) @cli.command() @@ -198,22 +211,52 @@ def list_buckets(context : Context): @cli.command() @click.argument("bucket") +@click.argument("path", required = False) @click.option( "-no", "--name-only", help = "Output only the name of the objects instead of all the associated metadata", - default = False + default = False, + is_flag = True +) +@click.option( + "-p", + "--pagination", + help = "Output as a paginator", + default = False, + is_flag = True +) +@click.option( + "-fo", + "--files-only", + help = "Output only file objects", + default = False, + is_flag = True ) @click.pass_context -def list_objects(context : Context, bucket : str, name_only : bool): +def list_objects(context : Context, bucket : str, path : str, name_only : bool, pagination : bool, files_only : bool): """ List the objects in a certain bucket/namespace on the HCP. BUCKET is the name of the bucket in which to list its objects. + + PATH is an optional argument for where to list the objects """ hcph : HCPHandler = get_HCPHandler(context) hcph.mount_bucket(bucket) - click.echo_via_pager(_list_objects_generator(hcph, name_only)) + if path: + path_with_slash = add_trailing_slash(path) + + if not hcph.object_exists(path_with_slash): + raise RuntimeError("Path does not exist") + else: + path_with_slash = "" + + if pagination: + click.echo_via_pager(_list_objects_generator(hcph, path_with_slash, name_only, files_only)) + else: + for obj in hcph.list_objects(path_with_slash, name_only, files_only): + click.echo(obj) @cli.command() @click.argument("bucket") @@ -222,23 +265,86 @@ def list_objects(context : Context, bucket : str, name_only : bool): "-cs", "--case_sensitive", help = "Use case sensitivity? Default value is False", - default = False + default = False, + is_flag = True +) +@click.option( + "-v", + "--verbose", + help = "Get a verbose output of files. Default value is False, since it might be slower", + default = False, + is_flag = True ) @click.pass_context -def simple_search(context : Context, bucket : str, search_string : str, case_sensitive : bool): +def simple_search(context : Context, bucket : str, search_string : str, case_sensitive : bool, verbose : bool): """ Make simple search using substrings in a bucket/namespace on the HCP. + NOTE: This command does not use the HCI. Instead, it uses a linear search of + all the objects in the HCP. As such, this search might be slow. + + BUCKET is the name of the bucket in which to make the search. + + SEARCH_STRING is any string that is to be used for the search. + """ + hcph : HCPHandler = get_HCPHandler(context) + hcph.mount_bucket(bucket) + list_of_results = hcph.search_in_bucket( + search_string, + name_only = (not verbose), + case_sensitive = case_sensitive + ) + click.echo("Search results:") + for result in list_of_results: + click.echo(result) + +@cli.command() +@click.argument("bucket") +@click.argument("search_string") +@click.option( + "-cs", + "--case_sensitive", + help = "Use case sensitivity? Default value is False", + default = False, + is_flag = True +) +@click.option( + "-v", + "--verbose", + help = "Get a verbose output of files. Default value is False, since it might be slower", + default = False, + is_flag = True +) +@click.option( + "-t", + "--threshold", + help = "Set the threshold for the fuzzy search score. Default value is 80", + default = 80 +) +@click.pass_context +def fuzzy_search(context : Context, bucket : str, search_string : str, case_sensitive : bool, verbose : bool, threshold : int): + """ + Make a fuzzy search using a search string in a bucket/namespace on the HCP. + + NOTE: This command does not use the HCI. Instead, it uses the RapidFuzz + library in order to find objects in the HCP. As such, this search might + be slow. + BUCKET is the name of the bucket in which to make the search. SEARCH_STRING is any string that is to be used for the search. """ hcph : HCPHandler = get_HCPHandler(context) hcph.mount_bucket(bucket) - list_of_results = hcph.search_objects_in_bucket(search_string, case_sensitive) + list_of_results = hcph.fuzzy_search_in_bucket( + search_string, + name_only = (not verbose), + case_sensitive = case_sensitive, + threshold = threshold + ) click.echo("Search results:") for result in list_of_results: - click.echo("- " + result) + click.echo(result) @cli.command() @click.argument("bucket") diff --git a/NGPIris/hcp/hcp.py b/NGPIris/hcp/hcp.py index da80ba7..159480e 100644 --- a/NGPIris/hcp/hcp.py +++ b/NGPIris/hcp/hcp.py @@ -31,9 +31,13 @@ from json import dumps from parse import ( parse, - search, Result ) +from rapidfuzz import ( + fuzz, + process, + utils +) from requests import get from urllib3 import disable_warnings from tqdm import tqdm @@ -62,10 +66,15 @@ def __init__(self, credentials_path : str, use_ssl : bool = False, proxy_path : credentials_handler = CredentialsHandler(credentials_path) self.hcp = credentials_handler.hcp self.endpoint = "https://" + self.hcp["endpoint"] - tenant_parse = parse("https://{}.hcp1.vgregion.se", self.endpoint) - if type(tenant_parse) is Result: - self.tenant = str(tenant_parse[0]) - else: # pragma: no cover + + self.tenant = None + for endpoint_format_string in ["https://{}.ngp-fs1000.vgregion.se", "https://{}.ngp-fs2000.vgregion.se", "https://{}.ngp-fs3000.vgregion.se", "https://{}.vgregion.sjunet.org"]: + tenant_parse = parse(endpoint_format_string, self.endpoint) + if type(tenant_parse) is Result: + self.tenant = str(tenant_parse[0]) + break + + if not self.tenant: raise RuntimeError("Unable to parse endpoint. Make sure that you have entered the correct endpoint in your credentials JSON file. Hint: The endpoint should *not* contain \"https://\" or port numbers") self.base_request_url = self.endpoint + ":9090/mapi/tenants/" + self.tenant self.aws_access_key_id = self.hcp["aws_access_key_id"] @@ -168,7 +177,9 @@ def test_connection(self, bucket_name : str = "") -> dict: elif bucket_name: pass else: - raise RuntimeError("No bucket selected. Either use `mount_bucket` first or supply the optional `bucket_name` paramter for `test_connection`") + raise RuntimeError("No bucket selected. Either use `mount_bucket` first or supply the optional `bucket_name` parameter for `test_connection`") + + response = {} try: response = dict(self.s3_client.head_bucket(Bucket = bucket_name)) except EndpointConnectionError as e: # pragma: no cover @@ -225,7 +236,7 @@ def list_buckets(self) -> list[str]: return list_of_buckets @check_mounted - def list_objects(self, path_key : str = "", name_only : bool = False) -> Generator: + def list_objects(self, path_key : str = "", name_only : bool = False, files_only : bool = False) -> Generator: """ List all objects in the mounted bucket as a generator. If one wishes to get the result as a list, use :py:function:`list` to type cast the generator @@ -234,16 +245,35 @@ def list_objects(self, path_key : str = "", name_only : bool = False) -> Generat :type path_key: str, optional :param name_only: If True, yield only a the object names. If False, yield the full metadata about each object. Defaults to False. :type name_only: bool, optional + :param files_only: If true, only yield file objects. Defaults to False + :type files_only: bool, optional :yield: A generator of all objects in a bucket :rtype: Generator """ paginator : Paginator = self.s3_client.get_paginator("list_objects_v2") - pages : PageIterator = paginator.paginate(Bucket = self.bucket_name) - for object in pages.search("Contents[?starts_with(Key, '" + path_key + "')][]"): - if name_only: - yield str(object["Key"]) - else: - yield object + pages : PageIterator = paginator.paginate(Bucket = self.bucket_name, Prefix = path_key) + + if files_only: + filter_string = "Contents[?!ends_with(Key, '/')][]" + else: + filter_string = "Contents[*][]" + + split_path_key = len(path_key.split("/")) + 1 + + pages_filtered = pages.search(filter_string) + for object in pages_filtered: + # Split the object key by "/" + split_object = object["Key"].split("/") + # Check if the object is within the specified path_key depth + if len(split_object) <= split_path_key: + # Skip objects that are not at the desired depth + if (len(split_object) == split_path_key) and split_object[-1]: + continue + + if name_only: + yield str(object["Key"]) + else: + yield object @check_mounted def get_object(self, key : str) -> dict: @@ -373,6 +403,9 @@ def upload_file(self, local_file_path : str, key : str = "") -> None: file_name = Path(local_file_path).name key = file_name + if "\\" in local_file_path: + raise RuntimeError("The \"\\\" character is not allowed in the file path") + if self.object_exists(key): raise ObjectAlreadyExist("The object \"" + key + "\" already exist in the mounted bucket") else: @@ -421,25 +454,24 @@ def delete_objects(self, keys : list[str], verbose : bool = True) -> None: :type verbose: bool, optional """ object_list = [] + does_not_exist = [] for key in keys: - object_list.append({"Key" : key}) - - deletion_dict = {"Objects": object_list} + if self.object_exists(key): + object_list.append({"Key" : key}) + else: + does_not_exist.append(key) - response : dict = self.s3_client.delete_objects( - Bucket = self.bucket_name, - Delete = deletion_dict - ) - if verbose: - print(dumps(response, indent=4)) - - deleted_dict_list : list[dict] = response["Deleted"] - does_not_exist = [] - for deleted_dict in deleted_dict_list: - if not "VersionId" in deleted_dict: - does_not_exist.append("- " + key + "\n") - if does_not_exist: - print("The following could not be deleted because they didn't exist: \n" + "".join(does_not_exist)) + if object_list: + deletion_dict = {"Objects": object_list} + response : dict = self.s3_client.delete_objects( + Bucket = self.bucket_name, + Delete = deletion_dict + ) + if verbose: + print(dumps(response, indent=4)) + + if verbose and does_not_exist: + print("The following could not be deleted because they didn't exist: \n" + "\n".join(does_not_exist)) @check_mounted def delete_object(self, key : str, verbose : bool = True) -> None: @@ -456,51 +488,98 @@ def delete_object(self, key : str, verbose : bool = True) -> None: @check_mounted def delete_folder(self, key : str, verbose : bool = True) -> None: """ - Delete a folder of objects in the mounted bucket. If there are subfolders, a RuntimeError is raisesd + Delete a folder of objects in the mounted bucket. If there are subfolders, a RuntimeError is raised :param key: The folder of objects to be deleted :type key: str :param verbose: Print the result of the deletion. defaults to True :type verbose: bool, optional - :raises RuntimeError: If there are subfolders, a RuntimeError is raisesd + :raises RuntimeError: If there are subfolders, a RuntimeError is raised """ if key[-1] != "/": key += "/" - object_path_in_folder = [] - for s in self.search_objects_in_bucket(key): - parse_object = parse(key + "{}", s) - if type(parse_object) is Result: - object_path_in_folder.append(s) - - for object_path in object_path_in_folder: - if object_path[-1] == "/": + + objects : list[str] = list(self.list_objects(key, name_only = True)) + + if not objects: + raise RuntimeError("\"" + key + "\"" + " is not a valid path") #TODO: change this error + + for object_path in objects: + if (object_path[-1] == "/") and (not object_path == key): # `objects` might contain key, in which case everything is fine raise RuntimeError("There are subfolders in this folder. Please remove these first, before deleting this one") - self.delete_objects(object_path_in_folder + [key], verbose = verbose) + self.delete_objects(objects, verbose = verbose) @check_mounted - def search_objects_in_bucket(self, search_string : str, case_sensitive : bool = False) -> list[str]: + def search_in_bucket( + self, + search_string : str, + name_only : bool = True, + case_sensitive : bool = False + ) -> Generator: """ - Simple search method using substrings in order to find certain objects. Case insensitive by default. Does not utilise the HCI + Simple search method using exact substrings in order to find certain + objects. Case insensitive by default. Does not utilise the HCI :param search_string: Substring to be used in the search :type search_string: str + :param name_only: If True, yield only a the object names. If False, yield the full metadata about each object. Defaults to False. + :type name_only: bool, optional + :param case_sensitive: Case sensitivity. Defaults to False :type case_sensitive: bool, optional - :return: List of object names that match the in some way to the object names - :rtype: list[str] + :return: A generator of objects based on the search string + :rtype: Generator + """ + return self.fuzzy_search_in_bucket(search_string, name_only, case_sensitive, 100) + + @check_mounted + def fuzzy_search_in_bucket( + self, + search_string : str, + name_only : bool = True, + case_sensitive : bool = False, + threshold : int = 80 + ) -> Generator: """ - search_result : list[str] = [] - for key in self.list_objects(name_only = True): - parse_object = search( + Fuzzy search implementation based on the `RapidFuzz` library. + + :param search_string: Substring to be used in the search + :type search_string: str + + :param name_only: If True, yield only a the object names. If False, yield the full metadata about each object. Defaults to False. + :type name_only: bool, optional + + :param case_sensitive: Case sensitivity. Defaults to False + :type case_sensitive: bool, optional + + :param threshold: The fuzzy search similarity score. Defaults to 80 + :type threshold: int, optional + + :return: A generator of objects based on the search string + :rtype: Generator + """ + + if case_sensitive: + processor = None + else: + processor = utils.default_process + + if not name_only: + full_list = list(self.list_objects()) + + for item, score, index in process.extract_iter( search_string, - key, - case_sensitive = case_sensitive - ) - if type(parse_object) is Result: - search_result.append(key) - return search_result + self.list_objects(name_only = True), + scorer = fuzz.partial_ratio, + processor = processor + ): + if score >= threshold: + if name_only: + yield item + else: + yield full_list[index] # type: ignore @check_mounted def get_object_acl(self, key : str) -> dict: diff --git a/README.md b/README.md index 03ef204..a8b19ae 100644 --- a/README.md +++ b/README.md @@ -149,116 +149,8 @@ Commands: test-connection Test the connection to a bucket/namespace. upload Upload files to an HCP bucket/namespace. ``` +Each sub-command has its own help message and is displayed by `iris path/to/your/credentials.json sub-command --help` -#### The `delete-folder` command -``` -Usage: iris CREDENTIALS delete-folder [OPTIONS] FOLDER BUCKET - - Delete a folder from an HCP bucket/namespace. - - FOLDER is the name of the folder to be deleted. - - BUCKET is the name of the bucket where the folder to be deleted exist. - -Options: - --help Show this message and exit. -``` - -#### The `delete-object` command -``` -Usage: iris CREDENTIALS delete-object [OPTIONS] OBJECT BUCKET - - Delete an object from an HCP bucket/namespace. - - OBJECT is the name of the object to be deleted. - - BUCKET is the name of the bucket where the object to be deleted exist. - -Options: - --help Show this message and exit. -``` - -#### The `download` command -``` -Usage: iris CREDENTIALS download [OPTIONS] OBJECT BUCKET LOCAL_PATH - - Download files from an HCP bucket/namespace. - - OBJECT is the name of the object to be downloaded. - - BUCKET is the name of the upload destination bucket. - - LOCAL_PATH is the path to where the downloaded objects are to be stored - locally. - -Options: - --help Show this message and exit. -``` - -#### The `list-buckets` command -``` -Usage: iris CREDENTIALS list-buckets [OPTIONS] - - List the available buckets/namespaces on the HCP. - -Options: - --help Show this message and exit. -``` - -#### The `list-objects` command -``` -Usage: iris CREDENTIALS list-objects [OPTIONS] BUCKET - - List the objects in a certain bucket/namespace on the HCP. - - BUCKET is the name of the bucket in which to list its objects. - -Options: - -no, --name-only BOOLEAN Output only the name of the objects instead of all - the associated metadata - --help Show this message and exit. -``` - -#### The `simple-search` command -``` -Usage: iris CREDENTIALS simple-search [OPTIONS] BUCKET SEARCH_STRING - - Make simple search using substrings in a bucket/namespace on the HCP. - - BUCKET is the name of the bucket in which to make the search. - - SEARCH_STRING is any string that is to be used for the search. - -Options: - -cs, --case_sensitive BOOLEAN Use case sensitivity? - --help Show this message and exit. -``` - -#### The `test-connection` command -``` -Usage: iris CREDENTIALS test-connection [OPTIONS] BUCKET - - Test the connection to a bucket/namespace. - - BUCKET is the name of the bucket for which a connection test should be made. - -Options: - --help Show this message and exit. -``` - -#### The `upload` command -``` -Usage: iris CREDENTIALS upload [OPTIONS] FILE_OR_FOLDER BUCKET - - Upload files to an HCP bucket/namespace. - - FILE-OR-FOLDER is the path to the file or folder of files to be uploaded. - - BUCKET is the name of the upload destination bucket. - -Options: - --help Show this message and exit. -``` ## Testing Assuming that the repository has been cloned, run the following tests: ```shell diff --git a/docs/Tutorial.md b/docs/Tutorial.md index 944daa0..5d84be9 100644 --- a/docs/Tutorial.md +++ b/docs/Tutorial.md @@ -15,6 +15,7 @@ - [The `HCPHandler` class](#the-hcphandler-class) - [Example use cases](#example-use-cases-1) - [Listing buckets/namespaces](#listing-bucketsnamespaces-1) + - [Listing objects in a bucket/namespace](#listing-objects-in-a-bucketnamespace) - [Downloading a file](#downloading-a-file-1) - [Uploading a file](#uploading-a-file-1) - [Uploading a folder](#uploading-a-folder) @@ -27,13 +28,16 @@ - [Look up information of an index](#look-up-information-of-an-index) - [Make queries](#make-queries) --- + +This tutorial was updated for `NGPIris 5.1`. + ## Introduction -IRIS 5 is a complete overhaul of the previous versions of IRIS, mainly in terms of its codebase. The general functionality like download from and upload to the HCP are still here, but might differ from previous versions from what you are used to. This document will hopefully shed some light on what you (the user) can expect and how your workflow with IRIS might change in comparison to previous versions of IRIS. +IRIS 5 is a complete overhaul of the previous versions of IRIS, mainly in terms of its codebase. The general functionality like download from and upload to the HCP are still here, but might differ from previous versions from what you are used to. This document will hopefully shed some light on what users can expect and how your workflow with IRIS might change in comparison to previous versions of IRIS. IRIS 5, like previous versions of IRIS, consists of two main parts: a Python package and an associated Command Line Interface (CLI), which are described below. ## CLI -IRIS 5 features a CLI like recent versions of IRIS. However, the new CLI is a bit different compared to before; the general structure of subcommands for the `iris` command are totally different, but it still has the subcommands you would come to expect. A new command, `iris_generate_credentials_file`, has also been added. It will generate an empty credentials file that can be filled in with your own NGPr credentials. +IRIS 5 features a CLI like recent versions of IRIS. However, the new CLI is a bit different compared to before; the general structure of subcommands for the `iris` command are vastly different, but it still has the subcommands you would come to expect. A new, separate, command called `iris_generate_credentials_file` has also been added. It will generate an empty credentials file that can be filled in with your own NGPr credentials. ### The `iris` command @@ -52,7 +56,7 @@ Options: Commands: delete-folder Delete a folder from an HCP bucket/namespace. delete-object Delete an object from an HCP bucket/namespace. - download Download files from an HCP bucket/namespace. + download Download a file or folder from an HCP bucket/namespace. list-buckets List the available buckets/namespaces on the HCP. list-objects List the objects in a certain bucket/namespace on the... simple-search Make simple search using substrings in a... @@ -61,42 +65,13 @@ Commands: ``` * `delete-folder`: Deletes a folder on the HCP * `delete-object`: Deletes an object on the HCP -* `download`: - * Downloads a file from a bucket/namespace on the HCP - * `iris path/to/credentials.json download --help`: - * ```cmd - Usage: iris CREDENTIALS download [OPTIONS] OBJECT BUCKET LOCAL_PATH - - Download files from an HCP bucket/namespace. - - OBJECT is the name of the object to be downloaded. - - BUCKET is the name of the upload destination bucket. - - LOCAL_PATH is the path to where the downloaded objects are to be stored - locally. - - Options: - --help Show this message and exit. - ``` +* `download`: Downloads a file or folder from a bucket/namespace on the HCP * `list-buckets`: Lists all buckets that the user is allowed to see * `list-objects`: Lists all objects that the user is allowed to see * `simple-search`: Performs a simple search using a substring in order to find matching objects in a bucket/namespace -* `upload`: - * Uploads either a file or a folder to a bucket/namespace on the HCP - * `iris path/to/credentials.json upload --help`: - * ```cmd - Usage: iris CREDENTIALS upload [OPTIONS] FILE_OR_FOLDER BUCKET - - Upload files to an HCP bucket/namespace. - - FILE-OR-FOLDER is the path to the file or folder of files to be uploaded. +* `test-connection`: Used for testing your connection to the HCP +* `upload`: Uploads a file or a folder to a bucket/namespace on the HCP - BUCKET is the name of the upload destination bucket. - - Options: - --help Show this message and exit. - ``` #### Example use cases The following subsections contain examples of simple use cases for IRIS 5. Of course, correct paths and bucket names should be replaced for your circumstances. ##### Listing buckets/namespaces @@ -105,11 +80,11 @@ iris path/to/your/credentials.json list-buckets ``` ##### Downloading a file ```shell -iris path/to/your/credentials.json download path/to/your/file/on/the/bucket the_name_of_the_bucket path/on/your/local/machine +iris path/to/your/credentials.json download the_name_of_the_bucket path/to/your/file/in/the/bucket path/on/your/local/machine ``` ##### Uploading a file ```shell -iris path/to/your/credentials.json upload destination/path/on/the/bucket the_name_of_the_bucket path/to/your/file/on/your/local/machine +iris path/to/your/credentials.json upload the_name_of_the_bucket destination/path/in/the/bucket path/to/your/file/on/your/local/machine ``` ##### Searching for a file By default, the `simple-search` command is case insensitive: @@ -122,11 +97,11 @@ iris path/to/your/credentials.json simple-search --case_sensitive True the_name_ ``` ##### Delete a file ```shell -iris path/to/your/credentials.json delete-object path/to/your/file/on/the/bucket the_name_of_the_bucket +iris path/to/your/credentials.json delete-object the_name_of_the_bucket path/to/your/file/in/the/bucket ``` ##### Delete a folder ```shell -iris path/to/your/credentials.json delete-folder path/to/your/folder/on/the/bucket/ the_name_of_the_bucket +iris path/to/your/credentials.json delete-folder the_name_of_the_bucket path/to/your/folder/on/the/bucket/ ``` ### The `iris_generate_credentials_file` command @@ -140,8 +115,9 @@ Usage: iris_generate_credentials_file [OPTIONS] plaintext. Options: - --path TEXT Path for where to put the new credentials file - --name TEXT Custom name for the credentials file + --path TEXT Path for where to put the new credentials file. + --name TEXT Custom name for the credentials file. Will filter out + everything after a "." character, if any exist. --help Show this message and exit. ``` Simply running `iris_generate_credentials_file` will generate a blank credentials file (which is just a JSON file) like the following: @@ -184,6 +160,7 @@ hcp_h = HCPHandler("credentials.json") hcp_h.mount_bucket("myBucket") ``` + #### Example use cases ##### Listing buckets/namespaces There is no need for mounting a bucket when listing all available buckets. However, credentials are still needed. As such, we can list all buckets with the following: @@ -194,6 +171,20 @@ hcp_h = HCPHandler("credentials.json") print(hcp_h.list_buckets()) ``` +##### Listing objects in a bucket/namespace +Since there might be many objects in a given bucket, a regular Python list would be memory inefficient. As such, a `Generator` is returned instead. Since `Generator`s are lazy objects, if we want to explicitly want all the objects we must first cast it to a `list` +```python +from NGPIris.hcp import HCPHandler + +hcp_h = HCPHandler("credentials.json") + +hcp_h.mount_bucket("myBucket") + +objects_generator = hcp_h.list_objects() + +print(list(objects_generator)) +``` + ##### Downloading a file ```python from NGPIris.hcp import HCPHandler diff --git a/pyproject.toml b/pyproject.toml index e281fc7..3b98b1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,13 @@ [project] name = "NGPIris" -version = "5.1.0" +version = "5.2.0" readme = "README.md" dependencies = [ "requests >= 2.31.0", "urllib3 == 1.26.19", - "requests >= 2.31.0", "boto3 >= 1.26.76", "parse >= 1.19.1", + "RapidFuzz >= 3.10.1", "tqdm >= 4.66.2", "click >= 8.1.7", "bitmath == 1.3.3.1", @@ -32,6 +32,10 @@ inputs = ["NGPIris"] pythonpath = [ "." ] +testpaths = [ + "tests" +] +addopts = "--strict-markers" # Adds command-line options filterwarnings = "ignore::urllib3.connectionpool.InsecureRequestWarning" [project.scripts] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0c9f6aa --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,67 @@ + +from pytest import Config, fixture, UsageError +from configparser import ConfigParser +from typing import Any, Generator +from shutil import rmtree + +from NGPIris.hcp import HCPHandler + +class CustomConfig: + """A typed wrapper around pytest.Config for dynamic attributes.""" + def __init__(self, pytest_config : Config): + self._config = pytest_config + + @property + def hcp_h(self) -> HCPHandler: + """Access the HCPHandler instance.""" + return getattr(self._config, "hcp_h") + + def __getattr__(self, name : str) -> Any: + """Dynamically get attributes set during pytest configuration.""" + return getattr(self._config, name) + +def set_section(config : Config, parser : ConfigParser, section : str) -> None: + parse_dict = dict(parser.items(section)) + for k, v in parse_dict.items(): + setattr(config, k, v) # Adds attributes dynamically to pytest.Config + +def pytest_addoption(parser) -> None: + parser.addoption( + "--config", + action="store", + default=None, + help="Path to the configuration file (e.g., path/to/config.ini)", + ) + +def pytest_configure(config : Config) -> None: + config_path = config.getoption("--config") + if not config_path: + raise UsageError("--config argument is required.") + else: + parser = ConfigParser() + parser.read(str(config_path)) + + # Add the INI parser to config + setattr(config, "parser", parser) + + # Dynamically add an HCPHandler instance to config + setattr(config, "hcp_h", HCPHandler(parser.get("General", "credentials_path"))) + + # Dynamically add all key-value pairs from "HCP_tests" section + set_section(config, parser, "HCP_tests") + +@fixture(scope = "session") +def hcp_result_path(pytestconfig : Config) -> str: + return pytestconfig.parser.get("HCP_tests", "result_path") # type: ignore + +@fixture(scope = "session", autouse = True) +def clean_up_after_tests(hcp_result_path : str) -> Generator[None, Any, None]: + # Setup code can go here if needed + yield + # Teardown code + rmtree(hcp_result_path) + +@fixture +def custom_config(pytestconfig : Config) -> CustomConfig: + """Provide the typed wrapper for pytest.Config.""" + return CustomConfig(pytestconfig) diff --git a/tests/test_conf_template.ini b/tests/test_conf_template.ini index 9bd1287..2d3933f 100644 --- a/tests/test_conf_template.ini +++ b/tests/test_conf_template.ini @@ -1,3 +1,7 @@ -[hcp_tests] -bucket = -data_test_file = 80MB_test_file \ No newline at end of file +[General] +credentials_path = + +[HCP_tests] +test_bucket = +test_file_path = tests/data/80MB_test_file +result_path = tests/data/results/ \ No newline at end of file diff --git a/tests/test_hci.py b/tests/test_hci.py index dd64a8a..a9f7982 100644 --- a/tests/test_hci.py +++ b/tests/test_hci.py @@ -1,10 +1,14 @@ +from configparser import ConfigParser from NGPIris.hci import HCIHandler from random import randint from json import dump from os import remove -hci_h = HCIHandler("credentials/testCredentials.json") +ini_config = ConfigParser() +ini_config.read("tests/test_conf.ini") + +hci_h = HCIHandler(ini_config.get("General", "credentials_path")) hci_h.request_token() def test_list_index_names_type() -> None: diff --git a/tests/test_hcp.py b/tests/test_hcp.py index 8fac4ba..3802405 100644 --- a/tests/test_hcp.py +++ b/tests/test_hcp.py @@ -1,216 +1,213 @@ -from typing import Callable -from NGPIris.hcp import HCPHandler -from configparser import ConfigParser + from pathlib import Path -from shutil import rmtree from filecmp import cmp +from typing import Any, Callable -hcp_h = HCPHandler("credentials/testCredentials.json") - -ini_config = ConfigParser() -ini_config.read("tests/test_conf.ini") - -test_bucket = ini_config.get("hcp_tests", "bucket") +from conftest import CustomConfig -test_file = ini_config.get("hcp_tests","data_test_file") -test_file_path = "tests/data/" + test_file +from NGPIris.hcp import HCPHandler -result_path = "tests/data/results/" +# --------------------------- Helper fucntions --------------------------------- -def _without_mounting(test : Callable) -> None: +def _without_mounting(hcp_h : HCPHandler, hcp_h_method : Callable[..., Any]) -> None: + hcp_h.bucket_name = None try: - test() + hcp_h_method(hcp_h) except: assert True else: # pragma: no cover assert False -def test_list_buckets() -> None: - assert hcp_h.list_buckets() +# --------------------------- Test suite --------------------------------------- + +def test_list_buckets(custom_config : CustomConfig) -> None: + assert custom_config.hcp_h.list_buckets() -def test_mount_bucket() -> None: - hcp_h.mount_bucket(test_bucket) +def test_mount_bucket(custom_config : CustomConfig) -> None: + custom_config.hcp_h.mount_bucket(custom_config.test_bucket) -def test_mount_nonexisting_bucket() -> None: +def test_mount_nonexisting_bucket(custom_config : CustomConfig) -> None: try: - hcp_h.mount_bucket("aBucketThatDoesNotExist") + custom_config.hcp_h.mount_bucket("aBucketThatDoesNotExist") except: assert True else: # pragma: no cover assert False -def test_test_connection() -> None: - test_mount_bucket() - hcp_h.test_connection() +def test_test_connection(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.test_connection() -def test_test_connection_with_bucket_name() -> None: - hcp_h.test_connection(bucket_name = test_bucket) +def test_test_connection_with_bucket_name(custom_config : CustomConfig) -> None: + custom_config.hcp_h.test_connection(bucket_name = custom_config.test_bucket) -def test_test_connection_without_mounting_bucket() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - try: - _hcp_h.test_connection() - except: - assert True - else: # pragma: no cover - assert False +def test_test_connection_without_mounting_bucket(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.test_connection) -def test_list_objects() -> None: - test_mount_bucket() - assert type(list(hcp_h.list_objects())) == list +def test_list_objects(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + assert type(list(custom_config.hcp_h.list_objects())) == list -def test_list_objects_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.list_objects) +def test_list_objects_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.list_objects) -def test_upload_file() -> None: - test_mount_bucket() - hcp_h.upload_file(test_file_path) +def test_upload_file(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.upload_file(custom_config.test_file_path) -def test_upload_file_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.upload_file) +def test_upload_file_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.upload_file) -def test_upload_file_in_sub_directory() -> None: - test_mount_bucket() - hcp_h.upload_file(test_file_path, "a_sub_directory/a_file") +def test_upload_file_in_sub_directory(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.upload_file(custom_config.test_file_path, "a_sub_directory/a_file") -def test_upload_nonexistent_file() -> None: - test_mount_bucket() +def test_upload_nonexistent_file(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) try: - hcp_h.upload_file("tests/data/aTestFileThatDoesNotExist") + custom_config.hcp_h.upload_file("tests/data/aTestFileThatDoesNotExist") except: assert True else: # pragma: no cover assert False -def test_upload_folder() -> None: - test_mount_bucket() - hcp_h.upload_folder("tests/data/a folder of data/", "a folder of data/") +def test_upload_folder(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.upload_folder("tests/data/a folder of data/", "a folder of data/") -def test_upload_folder_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.upload_folder) +def test_upload_folder_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.upload_folder) -def test_upload_nonexisting_folder() -> None: - test_mount_bucket() +def test_upload_nonexisting_folder(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) try: - hcp_h.upload_folder("tests/data/aFolderOfFilesThatDoesNotExist") + custom_config.hcp_h.upload_folder("tests/data/aFolderOfFilesThatDoesNotExist") except: assert True else: # pragma: no cover assert False -def test_get_file() -> None: - test_mount_bucket() - assert hcp_h.object_exists("a_sub_directory/a_file") - assert hcp_h.get_object("a_sub_directory/a_file") - -def test_get_folder_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.object_exists) - _without_mounting(_hcp_h.get_object) - -def test_get_file_in_sub_directory() -> None: - test_mount_bucket() - assert hcp_h.object_exists(test_file) - assert hcp_h.get_object(test_file) - -def test_download_file() -> None: - test_mount_bucket() - Path(result_path).mkdir() - hcp_h.download_file(test_file, result_path + test_file) - assert cmp(result_path + test_file, test_file_path) - -def test_download_file_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.download_file) - -def test_download_nonexistent_file() -> None: - test_mount_bucket() +def test_get_file(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + assert custom_config.hcp_h.object_exists("a_sub_directory/a_file") + assert custom_config.hcp_h.get_object("a_sub_directory/a_file") + +def test_get_folder_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.object_exists) + _without_mounting(_hcp_h, HCPHandler.get_object) + +def test_get_file_in_sub_directory(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + test_file = Path(custom_config.test_file_path).name + assert custom_config.hcp_h.object_exists(test_file) + assert custom_config.hcp_h.get_object(test_file) + +def test_download_file(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + Path(custom_config.result_path).mkdir() + test_file = Path(custom_config.test_file_path).name + custom_config.hcp_h.download_file(test_file, custom_config.result_path + test_file) + assert cmp(custom_config.result_path + test_file, custom_config.test_file_path) + +def test_download_file_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.download_file) + +def test_download_nonexistent_file(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) try: - hcp_h.download_file("aFileThatDoesNotExist", result_path + "aFileThatDoesNotExist") + custom_config.hcp_h.download_file("aFileThatDoesNotExist", custom_config.result_path + "aFileThatDoesNotExist") except: assert True else: # pragma: no cover assert False -def test_download_folder() -> None: - test_mount_bucket() - hcp_h.download_folder("a folder of data/", result_path) +def test_download_folder(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.download_folder("a folder of data/", custom_config.result_path) + +def test_search_objects_in_bucket(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + test_file = Path(custom_config.test_file_path).name + custom_config.hcp_h.search_objects_in_bucket(test_file) -def test_search_objects_in_bucket() -> None: - test_mount_bucket() - hcp_h.search_objects_in_bucket(test_file) +def test_search_objects_in_bucket_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.search_objects_in_bucket) -def test_search_objects_in_bucket_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.search_objects_in_bucket) +def test_get_object_acl(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + test_file = Path(custom_config.test_file_path).name + custom_config.hcp_h.get_object_acl(test_file) -def test_get_object_acl() -> None: - test_mount_bucket() - hcp_h.get_object_acl(test_file) +def test_get_object_acl_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.get_object_acl) -def test_get_object_acl_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.get_object_acl) +def test_get_bucket_acl(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.get_bucket_acl() -def test_get_bucket_acl() -> None: - test_mount_bucket() - hcp_h.get_bucket_acl() +def test_get_bucket_acl_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.get_bucket_acl) -def test_get_bucket_acl_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.get_bucket_acl) +# ------------------ Possibly future ACL tests --------------------------------- -#def test_modify_single_object_acl() -> None: -# test_mount_bucket() -# hcp_h.modify_single_object_acl() +#def test_modify_single_object_acl(custom_config : CustomConfig) -> None: +# test_mount_bucket(custom_config) +# custom_config.hcp_h.modify_single_object_acl() # -#def test_modify_single_bucket_acl() -> None: -# test_mount_bucket() -# hcp_h.modify_single_bucket_acl() +#def test_modify_single_bucket_acl(custom_config : CustomConfig) -> None: +# test_mount_bucket(custom_config) +# custom_config.hcp_h.modify_single_bucket_acl() # -#def test_modify_object_acl() -> None: -# test_mount_bucket() -# hcp_h.modify_object_acl() +#def test_modify_object_acl(custom_config : CustomConfig) -> None: +# test_mount_bucket(custom_config) +# custom_config.hcp_h.modify_object_acl() # -#def test_modify_bucket_acl() -> None: -# test_mount_bucket() -# hcp_h.modify_bucket_acl() - -def test_delete_file() -> None: - test_mount_bucket() - hcp_h.delete_object(test_file) - hcp_h.delete_object("a_sub_directory/a_file") - hcp_h.delete_object("a_sub_directory") - -def test_delete_file_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.delete_object) - -def test_delete_folder_with_sub_directory() -> None: - test_mount_bucket() - hcp_h.upload_file(test_file_path, "a folder of data/a sub dir/a file") +#def test_modify_bucket_acl(custom_config : CustomConfig) -> None: +# test_mount_bucket(custom_config) +# custom_config.hcp_h.modify_bucket_acl() + +# ------------------------------------------------------------------------------ + +def test_delete_file(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + test_file = Path(custom_config.test_file_path).name + custom_config.hcp_h.delete_object(test_file) + custom_config.hcp_h.delete_object("a_sub_directory/a_file") + custom_config.hcp_h.delete_object("a_sub_directory") + +def test_delete_file_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.delete_object) + +def test_delete_folder_with_sub_directory(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.upload_file(custom_config.test_file_path, "a folder of data/a sub dir/a file") try: - hcp_h.delete_folder("a folder of data/") + custom_config.hcp_h.delete_folder("a folder of data/") except: assert True else: # pragma: no cover assert False - hcp_h.delete_folder("a folder of data/a sub dir/") - -def test_delete_folder() -> None: - test_mount_bucket() - hcp_h.delete_folder("a folder of data/") + custom_config.hcp_h.delete_folder("a folder of data/a sub dir/") -def test_delete_folder_without_mounting() -> None: - _hcp_h = HCPHandler("credentials/testCredentials.json") - _without_mounting(_hcp_h.delete_folder) +def test_delete_folder(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.delete_folder("a folder of data/") -def test_delete_nonexistent_files() -> None: - hcp_h.delete_objects(["some", "files", "that", "does", "not", "exist"]) +def test_delete_folder_without_mounting(custom_config : CustomConfig) -> None: + _hcp_h = custom_config.hcp_h + _without_mounting(_hcp_h, HCPHandler.delete_folder) -def test_clean_up() -> None: - rmtree(result_path) \ No newline at end of file +def test_delete_nonexistent_files(custom_config : CustomConfig) -> None: + test_mount_bucket(custom_config) + custom_config.hcp_h.delete_objects(["some", "files", "that", "does", "not", "exist"])