Skip to content

Commit

Permalink
Merge pull request #55 from materials-data-facility/toolbox-dev
Browse files Browse the repository at this point in the history
Toolbox v0.5.5
  • Loading branch information
jgaff authored Nov 13, 2020
2 parents f8822a3 + 3076e03 commit 4ec8154
Show file tree
Hide file tree
Showing 16 changed files with 1,463 additions and 1,488 deletions.
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
language: python
python:
- '3.4'
- '3.5'
- '3.6'
- '3.7'
- '3.8'
Expand Down
15 changes: 10 additions & 5 deletions mdf_toolbox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# F401: Imported but unused - needed for import convenience
# F403: Star import - Don't need to enumerate all tools
from .toolbox import * # noqa: F401,F403
from .search_helper import SearchHelper # noqa: F401
from .sub_helpers import * # noqa: F401,F403
# F401: Imported but unused
from .auth import anonymous_login, confidential_login, login, logout # noqa: F401
from .filesystem import posixify_path, uncompress_tree # noqa: F401
from .globus_search.utils import format_gmeta, gmeta_pop, translate_index # noqa: F401
from .globus_search.search_helper import SearchHelper # noqa: F401
from .globus_search.sub_helpers import AggregateHelper # noqa: F401
from .globus_transfer import custom_transfer, globus_check_directory, quick_transfer # noqa: F401
from .json_dict import (dict_merge, flatten_json, insensitive_comparison, # noqa: F401
prettify_json, translate_json) # noqa: F401
from .jsonschema import condense_jsonschema, expand_jsonschema, prettify_jsonschema # noqa: F401
from .version import __version__ # noqa: F401
226 changes: 226 additions & 0 deletions mdf_toolbox/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
from fair_research_login import NativeClient
from globus_nexus_client import NexusClient
import globus_sdk


# *************************************************
# * Authentication utilities
# *************************************************

KNOWN_SCOPES = {
"transfer": "urn:globus:auth:scope:transfer.api.globus.org:all",
"search": "urn:globus:auth:scope:search.api.globus.org:search",
"search_ingest": "urn:globus:auth:scope:search.api.globus.org:all",
"data_mdf": "urn:globus:auth:scope:data.materialsdatafacility.org:all",
"mdf_connect": "https://auth.globus.org/scopes/c17f27bb-f200-486a-b785-2a25e82af505/connect",
"petrel": "https://auth.globus.org/scopes/56ceac29-e98a-440a-a594-b41e7a084b62/all",
"groups": "urn:globus:auth:scope:nexus.api.globus.org:groups",
"dlhub": "https://auth.globus.org/scopes/81fc4156-a623-47f2-93ad-7184118226ba/auth",
"funcx": "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all"
}
KNOWN_CLIENTS = {
KNOWN_SCOPES["transfer"]: globus_sdk.TransferClient,
"transfer": globus_sdk.TransferClient,
KNOWN_SCOPES["search"]: globus_sdk.SearchClient,
"search": globus_sdk.SearchClient,
KNOWN_SCOPES["search_ingest"]: globus_sdk.SearchClient,
"search_ingest": globus_sdk.SearchClient,
KNOWN_SCOPES["groups"]: NexusClient,
"groups": NexusClient
}
DEFAULT_APP_NAME = "UNNAMED_APP"
DEFAULT_CLIENT_ID = "984464e2-90ab-433d-8145-ac0215d26c8e"
STD_TIMEOUT = 5 * 60 # 5 minutes


def anonymous_login(services):
"""Initialize service clients without authenticating to Globus Auth.
Note:
Clients may have reduced functionality without authentication.
Arguments:
services (str or list of str): The services to initialize clients for.
Returns:
dict: The clients requested, indexed by service name.
"""
if isinstance(services, str):
services = [services]

clients = {}
# Initialize valid services
for serv in services:
try:
clients[serv] = KNOWN_CLIENTS[serv](http_timeout=STD_TIMEOUT)
except KeyError: # No known client
print("Error: No known client for '{}' service.".format(serv))
except Exception: # Other issue, probably auth
print("Error: Unable to create client for '{}' service.\n"
"Anonymous access may not be allowed.".format(serv))

return clients


def confidential_login(services, client_id, client_secret, make_clients=True):
"""Log in to Globus services as a confidential client
(a client with its own login information, i.e. NOT a human's account).
Arguments:
services (list of str): Services to authenticate with.
client_id (str): The ID of the client.
client_secret (str): The client's secret for authentication.
make_clients (bool): If ``True``, will make and return appropriate clients
with generated tokens.
If ``False``, will only return authorizers.
**Default**: ``True``.
Returns:
dict: The clients and authorizers requested, indexed by service name.
"""
if isinstance(services, str):
services = [services]

conf_client = globus_sdk.ConfidentialAppAuthClient(client_id, client_secret)
servs = []
for serv in services:
serv = serv.lower().strip()
if type(serv) is str:
servs += serv.split(" ")
else:
servs += list(serv)
# Translate services into scopes as possible
scopes = [KNOWN_SCOPES.get(sc, sc) for sc in servs]

# Make authorizers for each scope requested
all_authorizers = {}
for scope in scopes:
# TODO: Allow non-CC authorizers?
try:
all_authorizers[scope] = globus_sdk.ClientCredentialsAuthorizer(conf_client, scope)
except Exception as e:
print("Error: Cannot create authorizer for scope '{}' ({})".format(scope, str(e)))

returnables = {}
# Process authorizers (rename keys to originals, make clients)
for scope, auth in all_authorizers.items():
# User specified known_scope name and not scope directly
if scope not in servs:
try:
key = [k for k, v in KNOWN_SCOPES.items() if scope == v][0]
except IndexError: # Not a known scope(?), fallback to scope as key
key = scope
# User specified scope directly
else:
key = scope

# User wants clients and client supported
if make_clients and scope in KNOWN_CLIENTS.keys():
returnables[key] = KNOWN_CLIENTS[scope](authorizer=auth, http_timeout=STD_TIMEOUT)
# Returning authorizer only
else:
returnables[key] = auth

return returnables


def login(services, make_clients=True, clear_old_tokens=False, **kwargs):
"""Log in to Globus services.
Arguments:
services (list of str): The service names or scopes to authenticate to.
make_clients (bool): If ``True``, will make and return appropriate clients with
generated tokens. If ``False``, will only return authorizers.
**Default**: ``True``.
clear_old_tokens (bool): Force a login flow, even if loaded tokens are valid.
Same effect as ``force``. If one of these is ``True``, the effect triggers
**Default**: ``False``.
Keyword Arguments:
app_name (str): Name of the app/script/client. Used for the named grant during consent,
and the local server browser page by default.
**Default**: ``'UNKNOWN_APP'``.
client_id (str): The ID of the client registered with Globus at
https://developers.globus.org
**Default**: The MDF Native Clients ID.
no_local_server (bool): Disable spinning up a local server to automatically
copy-paste the auth code. THIS IS REQUIRED if you are on a remote server.
When used locally with no_local_server=False, the domain is localhost with
a randomly chosen open port number.
**Default**: ``False``.
no_browser (bool): Do not automatically open the browser for the Globus Auth URL.
Display the URL instead and let the user navigate to that location manually.
**Default**: ``False``.
refresh_tokens (bool): Use Globus Refresh Tokens to extend login time.
**Default**: ``True``.
force (bool): Force a login flow, even if loaded tokens are valid.
Same effect as ``clear_old_tokens``. If one of these is ``True``, the effect
triggers. **Default**: ``False``.
Returns:
dict: The clients and authorizers requested, indexed by service name.
For example, if ``login()`` is told to auth with ``'search'``
then the search client will be in the ``'search'`` field.
"""
if isinstance(services, str):
services = [services]
# Set up arg defaults
app_name = kwargs.get("app_name") or DEFAULT_APP_NAME
client_id = kwargs.get("client_id") or DEFAULT_CLIENT_ID

native_client = NativeClient(client_id=client_id, app_name=app_name)

# Translate known services into scopes, existing scopes are cleaned
servs = []
for serv in services:
serv = serv.lower().strip()
if type(serv) is str:
servs += serv.split(" ")
else:
servs += list(serv)
scopes = [KNOWN_SCOPES.get(sc, sc) for sc in servs]

native_client.login(requested_scopes=scopes,
no_local_server=kwargs.get("no_local_server", False),
no_browser=kwargs.get("no_browser", False),
refresh_tokens=kwargs.get("refresh_tokens", True),
force=clear_old_tokens or kwargs.get("force", False))

all_authorizers = native_client.get_authorizers_by_scope(requested_scopes=scopes)
returnables = {}
# Process authorizers (rename keys to originals, make clients)
for scope, auth in all_authorizers.items():
# User specified known_scope name and not scope directly
if scope not in servs:
try:
key = [k for k, v in KNOWN_SCOPES.items() if scope == v][0]
except IndexError: # Not a known scope(?), fallback to scope as key
key = scope
# User specified scope directly
else:
key = scope

# User wants clients and client supported
if make_clients and scope in KNOWN_CLIENTS.keys():
returnables[key] = KNOWN_CLIENTS[scope](authorizer=auth, http_timeout=STD_TIMEOUT)
# Returning authorizer only
else:
returnables[key] = auth

return returnables


def logout(app_name=None, client_id=None):
"""Revoke and delete all saved tokens for the app.
Arguments:
app_name (str): Name of the app/script/client.
**Default**: ``'UNKNOWN_APP'``.
client_id (str): The ID of the client.
**Default**: The MDF Native Clients ID.
"""
if not app_name:
app_name = DEFAULT_APP_NAME
if not client_id:
client_id = DEFAULT_CLIENT_ID
NativeClient(app_name=app_name, client_id=client_id).logout()
77 changes: 77 additions & 0 deletions mdf_toolbox/filesystem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os
from pathlib import PureWindowsPath
import re
import shutil


# *************************************************
# * Filesystem utilities
# *************************************************

def posixify_path(path: str) -> str:
"""Ensure that a path is in POSIX format.
Windows paths are converted to POSIX style,
where the "Drive" is listed as the
first folder (e.g., ``/c/Users/globus_user/``).
Arguments:
path (str): Input path
Returns:
str: Rectified path
"""
is_windows = re.match('[A-Z]:\\\\', path) is not None
if is_windows:
ppath = PureWindowsPath(path)
return '/{0}{1}'.format(ppath.drive[:1].lower(), ppath.as_posix()[2:])
return path # Nothing to do for POSIX paths


def uncompress_tree(root, delete_archives=False):
"""Uncompress all tar, zip, and gzip archives under a given directory.
Archives will be extracted to a sibling directory named after the archive (minus extension).
This process can be slow, depending on the number and size of archives.
Arguments:
root (str): The path to the starting (root) directory.
delete_archives (bool): If ``True``, will delete extracted archive files.
If ``False``, will preserve archive files.
**Default**: ``False``.
Returns:
dict: Results of the operation.
* **success** (*bool*) - If the extraction succeeded.
* **num_extracted** (*int*) - Number of archives extracted.
* **files_errored** (*list of str*) - The files that threw an unexpected
exception when extracted.
"""
num_extracted = 0
error_files = []
# Start list of dirs to extract with root
# Later, add newly-created dirs with extracted files, because os.walk will miss them
extract_dirs = [os.path.abspath(os.path.expanduser(root))]
while len(extract_dirs) > 0:
for path, dirs, files in os.walk(extract_dirs.pop()):
for filename in files:
try:
# Extract my_archive.tar to sibling dir my_archive
archive_path = os.path.join(path, filename)
extracted_files_dir = os.path.join(path, os.path.splitext(filename)[0])
shutil.unpack_archive(archive_path, extracted_files_dir)
except shutil.ReadError:
# ReadError means is not an (extractable) archive
pass
except Exception:
error_files.append(os.path.join(path, filename))
else:
num_extracted += 1
# Add new dir to list of dirs to process
extract_dirs.append(extracted_files_dir)
if delete_archives:
os.remove(archive_path)
return {
"success": True,
"num_extracted": num_extracted,
"files_errored": error_files
}
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import globus_sdk

# Importing whole package for portability - this module can be copied directly somewhere else
import mdf_toolbox


Expand Down Expand Up @@ -423,7 +424,7 @@ def _mapping(self):
dict: The full mapping for the index.
"""
return (self.__search_client.get(
"/unstable/index/{}/mapping".format(mdf_toolbox.translate_index(self.index)))
"/beta/index/{}/mapping".format(mdf_toolbox.translate_index(self.index)))
["mappings"])

# ************************************************************************************
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import warnings

from mdf_toolbox.search_helper import SearchHelper, SEARCH_LIMIT
from .search_helper import SearchHelper, SEARCH_LIMIT


class AggregateHelper(SearchHelper):
Expand Down
Loading

0 comments on commit 4ec8154

Please sign in to comment.