-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathglobus-weblinks
executable file
·79 lines (67 loc) · 3.03 KB
/
globus-weblinks
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#! /usr/bin/env python3
import argparse
from pathlib import Path, PurePath
import json
import requests
import globus_sdk
from urllib.parse import urlparse
# This is the tutorial client ID from
# https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html.
# Let's not bother to create our own.
CLIENT_ID = "61338d24-54d5-408f-a10d-66c06b59f6d2"
def get_transfer_token():
client = globus_sdk.NativeAppAuthClient(CLIENT_ID)
client.oauth2_start_flow()
authorize_url = client.oauth2_get_authorize_url()
print(f"Please go to this URL and login:\n\n{authorize_url}\n")
auth_code = input("Please enter the code you get after login here: ").strip()
return (client.oauth2_exchange_code_for_tokens(auth_code)
.by_resource_server["transfer.api.globus.org"]["access_token"])
def find_files(transfer_client, endpoint_id, path=PurePath("/")):
for file in transfer_client.operation_ls(endpoint_id, path=str(path))["DATA"]:
if file["type"] == "dir":
yield from find_files(transfer_client, endpoint_id, path / file["name"])
else:
yield path / file["name"]
def download_file(url, cookies):
"""Download URL.
Return True if it was actually downloaded, or return False if it
was skipped.
"""
filepath = Path(urlparse(url).path).relative_to("/")
url_size = int(requests.head(url, cookies=cookies).headers['content-length'])
# If local file is larger than remote, something is wrong.
if filepath.exists() and (filepath.stat().st_size > url_size):
raise Exception("Local file is larger than remote. "
"Something is wrong, aborting. "
"Maybe your autentication cookies are invalid?")
# If local file does not exist or is smaller than remote, proceed
# to download.
elif (not filepath.exists()) or (filepath.stat().st_size < url_size):
filepath.parent.mkdir(parents=True, exist_ok=True)
with open(filepath, "wb") as f:
for chunk in (requests.get(url, cookies=cookies, stream=True)
.iter_content(chunk_size=1024*1024)):
f.write(chunk)
return True
else:
return False
parser = argparse.ArgumentParser(description="Get web links for Globus collection")
parser.add_argument("endpoint_id", metavar="endpoint-id", help="Endpoint ID of collection")
parser.add_argument("cookies", help="JSON file with cookies from Globus web app")
args = parser.parse_args()
transfer_client = globus_sdk.TransferClient(
authorizer=globus_sdk.AccessTokenAuthorizer(get_transfer_token()))
endpoint = transfer_client.get_endpoint(args.endpoint_id)
urls = [endpoint["https_server"] + str(path)
for path in find_files(transfer_client, args.endpoint_id)]
total = len(urls)
print(f"Found {total} files")
with open(args.cookies) as f:
cookies = json.load(f)
for i, url in enumerate(urls, 1):
if download_file(url, cookies):
print(f"{i}/{total}: Downloaded {url}")
else:
print(f"{i}/{total}: Skipped {url}")
print("Download complete!")