forked from datalad/datalad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcopy_urls_from_datalad.py
executable file
·61 lines (55 loc) · 2.3 KB
/
copy_urls_from_datalad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Little helper to copy all URLs which were mistakenly submitted to datalad
remote instead of straight to web.
May be later could be RFed into some helper function if comes needed again
"""
from collections import defaultdict
from datalad.support.annexrepo import AnnexRepo
from datalad import lgr
from tqdm import tqdm
def get_remote_urls(rec, remote):
for k, v in rec.items():
if v.get('description', '') in [remote, '[%s]' % remote]:
return v.get('urls', [])
return []
if __name__ == '__main__':
annex = AnnexRepo('.', create=False, init=False)
# enable datalad special remote
urls_to_register = defaultdict(list) # key: urls
try:
annex.call_annex(["enableremote", "datalad"])
# go through each and see where urls aren't yet under web
# seems might have also --in=datalad to restrict
w = annex.whereis([], options=['--all'], output='full')
lgr.info("Got %d entries", len(w))
for k, rec in tqdm(w.items()):
datalad_urls = get_remote_urls(rec, 'datalad')
web_urls = set(get_remote_urls(rec, 'web'))
for url in datalad_urls:
if url not in web_urls:
if 'openneuro.s3' in url or 'openfmri.s3' in url:
urls_to_register[k].append(url)
else:
lgr.warning("Found unexpected url %s" % url)
finally:
# disable datalad special remote
annex.remove_remote("datalad") # need to disable it first
lgr.info(
"Got %d entries which could get new urls",
len(urls_to_register)
)
for k, urls in tqdm(urls_to_register.items()):
for url in urls:
annex.call_annex([
"registerurl", '-c', 'annex.alwayscommit=false', k, url])
# to cause annex to commit all the changes
annex.call_annex(["merge"])
annex.gc(allow_background=False)