-
-
Notifications
You must be signed in to change notification settings - Fork 42
/
bandcamp-downloader.py
executable file
·591 lines (536 loc) · 25.2 KB
/
bandcamp-downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
#!/usr/bin/env python3
import argparse
import datetime
import glob
import html
import http
import json
import os
import re
import sys
import time
import urllib.parse
import traceback
import zipfile
from concurrent.futures import ThreadPoolExecutor
# These require pip installs
from bs4 import BeautifulSoup, SoupStrainer
from curl_cffi import requests
import browser_cookie3
from tqdm import tqdm
USER_URL = 'https://bandcamp.com/{}'
COLLECTION_POST_URL = 'https://bandcamp.com/api/fancollection/1/collection_items'
HIDDEN_POST_URL = 'https://bandcamp.com/api/fancollection/1/hidden_items'
FILENAME_REGEX = re.compile('filename\\*=UTF-8\'\'(.*)')
SANITIZE_PATH_WINDOWS_REGEX = re.compile(r'[<>:"/|?*\\]')
CONFIG = {
'VERBOSE' : False,
'OUTPUT_DIR' : None,
'BROWSER' : None,
'FORMAT' : None,
'FORCE' : False,
'TQDM' : None,
'MAX_URL_ATTEMPTS' : 5,
'URL_RETRY_WAIT' : 5,
'POST_DOWNLOAD_WAIT' : 1,
'SINCE' : None,
}
MAX_THREADS = 32
DEFAULT_THREADS = 5
DEFAULT_FILENAME_FORMAT = os.path.join('{artist}', '{artist} - {title}')
FORMAT_EXTENSIONS = {
'aac-hi': '.m4a',
'aiff-lossless': '.aiff',
'alac': '.m4a',
'flac': '.flac',
'mp3-320': '.mp3',
'mp3-v0': '.mp3',
'vorbis': '.ogg',
'wav': '.wav'
}
SUPPORTED_BROWSERS = [
'firefox',
'chrome',
'chromium',
'brave',
'opera',
'edge'
]
TRACK_INFO_KEYS = [
'item_id',
'artist',
'title'
]
def main() -> int:
parser = argparse.ArgumentParser(description = 'Download your collection from bandcamp. Requires a logged in session in a supported browser so that the browser cookies can be used to authenticate with bandcamp. Albums are saved into directories named after their artist. Already existing albums will have their file size compared to what is expected and re-downloaded if the sizes differ. Otherwise already existing albums will not be re-downloaded.')
parser.add_argument('username', type=str, help='Your bandcamp username, as it appears at the end of your bandcamp collection url, I.E. bandcamp.com/user_name')
parser.add_argument(
'--browser', '-b',
type=str,
default = 'firefox',
choices = SUPPORTED_BROWSERS,
help='The browser whose cookies to use for accessing bandcamp. Defaults to "firefox"'
)
parser.add_argument(
'--cookies', '-c',
type=str,
help='Path to a cookie file. First, we will try to use it as a mozilla cookie jar. If that fails, it\'ll be used as the path for your given browser\'s cookie store.',
)
parser.add_argument(
'--directory', '-d',
default = os.getcwd(),
help='The directory to download albums to. Defaults to the current directory.'
)
parser.add_argument(
'--filename-format',
default = DEFAULT_FILENAME_FORMAT,
help='The filename format for downloaded tracks. Default is \'{}\'. All placeholders: {}'.format(DEFAULT_FILENAME_FORMAT, ', '.join(TRACK_INFO_KEYS))
)
parser.add_argument(
'--format', '-f',
default = 'mp3-320',
choices = FORMAT_EXTENSIONS.keys(),
help = 'What format to download the songs in. Default is \'mp3-320\'.'
)
parser.add_argument(
'--parallel-downloads', '-p',
type = int,
default = DEFAULT_THREADS,
help = 'How many threads to use for parallel downloads. Set to \'1\' to disable parallelism. Default is 5. Must be between 1 and {}'.format(MAX_THREADS),
)
parser.add_argument(
'--force',
action = 'store_true',
default = False,
help = 'Always re-download existing albums, even if they already exist.',
)
parser.add_argument(
'--wait-after-download',
type = float,
default = 1,
help = 'How long, in seconds, to wait after successfully completing a download before downloading the next file. Defaults to \'1\'.',
)
parser.add_argument(
'--max-download-attempts',
type = int,
default = 5,
help = 'How many times to try downloading any individual files before giving up on it. Defaults to \'5\'.',
)
parser.add_argument(
'--retry-wait',
type = float,
default = 5,
help = 'How long, in seconds, to wait before trying to download a file again after a failure. Defaults to \'5\'.',
)
parser.add_argument(
'--include-hidden',
action='store_true',
default=False,
help = 'Download items in your collection that have been marked as hidden.',
)
parser.add_argument(
'--download-since',
default = '',
help = 'Only download items purchased on or after the given date. YYYY-MM-DD format, defaults to all items.'
)
parser.add_argument(
'--extract', '-x',
action='store_true',
help='Extracts downloaded albums, organised in {ARTIST}/{ALBUM} subdirectories. Songs are extracted to the '
'path specified in the `--directory`/`-d` flag, otherwise to the current directory if not specified. '
'Upon completion, original .zip file is deleted.'
)
parser.add_argument(
'--dry-run',
action = 'store_true',
default = False,
help = 'Don\'t actually download files, just process all the web data and report what would have been done.',
)
parser.add_argument('--verbose', '-v', action='count', default = 0)
args = parser.parse_args()
if args.parallel_downloads < 1 or args.parallel_downloads > MAX_THREADS:
parser.error('--parallel-downloads must be between 1 and 32.')
CONFIG['COOKIES'] = args.cookies
CONFIG['VERBOSE'] = args.verbose
CONFIG['OUTPUT_DIR'] = os.path.normcase(args.directory)
CONFIG['FILENAME_FORMAT'] = args.filename_format
CONFIG['BROWSER'] = args.browser
if args.download_since:
CONFIG['SINCE'] = datetime.datetime.strptime(args.download_since, '%Y-%m-%d')
CONFIG['FORMAT'] = args.format
CONFIG['FORCE'] = args.force
CONFIG['DRY_RUN'] = args.dry_run
CONFIG['EXTRACT'] = args.extract
if args.wait_after_download < 0:
parser.error('--wait-after-download must be at least 0.')
if args.max_download_attempts < 1:
parser.error('--max-download-attempts must be at least 1.')
if args.retry_wait < 0:
parser.error('--retry-wait must be at least 0.')
CONFIG['POST_DOWNLOAD_WAIT'] = args.wait_after_download
CONFIG['MAX_URL_ATTEMPTS'] = args.max_download_attempts
CONFIG['URL_RETRY_WAIT'] = args.retry_wait
if CONFIG['VERBOSE']: print(args)
if CONFIG['FORCE']: print('WARNING: --force flag set, existing files will be overwritten.')
CONFIG['COOKIE_JAR'] = get_cookies()
items = get_items_for_user(args.username, args.include_hidden)
if not items:
print('WARN: No album links found for user [{}]. Are you logged in and have you selected the correct browser to pull cookies from?'.format(args.username))
sys.exit(2)
if CONFIG['VERBOSE']: print('Found [{}] downloadable items in [{}]\'s collection.'.format(len(items), args.username))
if CONFIG['SINCE']:
# Filter items by purchase time
items = {key: items[key] for key in items
if purchase_time_ok(items[key], CONFIG['SINCE'])}
if not items:
print('No album links purchased since [{}].'.format(CONFIG['SINCE']))
sys.exit(0)
if CONFIG['VERBOSE']:
print('[{}] album links purchased since [{}].'.format(len(items), CONFIG['SINCE']))
print('Starting album downloads...')
CONFIG['TQDM'] = tqdm(items, unit = 'album')
if args.parallel_downloads > 1:
with ThreadPoolExecutor(max_workers = args.parallel_downloads) as executor:
for item in items.values(): executor.submit(download_and_log_album, item)
else:
for album in items.values():
download_and_log_album(album)
CONFIG['TQDM'].close()
downloaded_zips = [item['file_path'] + '.zip' for item in items.values() if item['extension'] == '.zip' and item['downloaded']]
print(downloaded_zips)
if args.extract:
for zip in downloaded_zips:
print(f'Extracting compressed archive: {zip}')
if CONFIG['DRY_RUN']: continue
album_name = re.search(r'\- (.+?)\.zip$', zip).group(1)
extract_dir = os.path.join(os.path.dirname(zip), album_name)
with zipfile.ZipFile(zip, 'r') as zip_file:
zip_file.extractall(extract_dir)
os.remove(zip)
print('Done.')
return 0
# Fetch item data for the given user via the bandcamp API, then return the
# 'items' subobject, with 'redownload_url' and 'filename' fields added to each.
def fetch_items(_hidden : bool, _user_id : str, _last_token : str, _count : int) -> dict:
url = COLLECTION_POST_URL if not _hidden else HIDDEN_POST_URL
if _count <= 0: return {}
payload = {
'fan_id' : _user_id,
'count' : _count,
'older_than_token' : _last_token,
}
response = requests.post(
url,
data = json.dumps(payload),
cookies = CONFIG['COOKIE_JAR'],
impersonate='chrome'
)
response.raise_for_status()
data = json.loads(response.text)
return merge_items_and_urls(data['items'], data['redownload_urls'] or {})
# Loads the given url and looks for the 'pagedata' div and, if found,
# returns its 'data-blob' property decoded from json.
def pagedata_for_url(_url : str) -> dict:
text = requests.get(_url, cookies = CONFIG['COOKIE_JAR']).text
soup = BeautifulSoup(
text,
'html.parser',
parse_only = SoupStrainer('div', id='pagedata'),
)
div = soup.find('div')
if not div: return {}
return json.loads(html.unescape(div.get('data-blob')))
# Returns a dictionary mapping item key to item. In addition to the basic
# bandcamp API item dict, returned items have their redownload url in
# field 'redownload_url' and their download path (excluding extension) in
# 'file_path'.
def get_items_for_user(_user : str, _include_hidden : bool) -> dict:
# Get the initial metadata from the json in the 'data-blob' div on the
# user landing page.
user_url = USER_URL.format(_user)
data = pagedata_for_url(user_url)
if not data:
print('ERROR: No data found at user url [{}]'.format(user_url))
exit(2)
if 'collection_count' not in data:
print('ERROR: No collection info for user {}.\nPlease double check that your given username is correct.\nIt should be given exactly as it appears at the end of your bandcamp user url.\nFor example: bandcamp.com/user_name'.format(
_user
))
exit(2)
user_id = data['fan_data']['fan_id']
items = data['item_cache']['collection']
if _include_hidden:
items.update(data['item_cache']['hidden'])
# Attach download urls to the first page of items.
items = merge_items_and_urls(items.values(), data['collection_data']['redownload_urls'])
# Fetch the rest of the visible library items.
items.update(fetch_items(
False,
user_id,
data['collection_data']['last_token'],
# count is the number we have left to fetch after the initial data blob
data['collection_data']['item_count'] - len(data['item_cache']['collection'])))
if _include_hidden:
# Fetch the rest of the hidden library items.
items.update(fetch_items(
True,
user_id,
data['hidden_data']['last_token'],
data['hidden_data']['item_count'] - len(data['item_cache']['hidden'])))
# Calculate filenames and handle collisions
add_item_file_paths(items)
return items
# Returns true if the item's purchase time is no earlier than the given
# cutoff, or if the item's purchase time can't be found.
def purchase_time_ok(_item : dict, _since : datetime.datetime) -> bool:
# If there's no purchased field we have to say yes since we can't
# reliably exclude this item.
if 'purchased' not in _item: return True
# If there is a purchased field, compare it to the given cutoff.
purchaseTime = datetime.datetime.strptime(_item['purchased'], '%d %b %Y %H:%M:%S GMT')
return purchaseTime >= _since
# Returns true if a valid item key can be assembled from the given item dict.
def item_has_key(_item) -> bool:
return 'sale_item_type' in _item and 'sale_item_id' in _item
# Returns the canonical key for an item (within the bandcamp API),
# its type followed by its item id.
def key_for_item(_item) -> str:
return str(_item['sale_item_type']) + str(_item['sale_item_id'])
# Merges the 'redownload_urls' dict into the 'items' dict from the bandcamp
# item list. The result is a dict keyed by key_for_item(item), where each
# item has the added key 'redownload_url'.
# Items with mismatched keys or no redownload url are dropped.
def merge_items_and_urls(_items : list, _urls : dict) -> dict:
results = {}
for item in _items:
if not item_has_key(item) or key_for_item(item) not in _urls:
CONFIG['TQDM'].write("WARN: couldn't find redownload URL for item_id:[{}], artist:[{}], title:[{}]".format(
item['item_id'], item['band_name'], item['item_title']))
continue
key = key_for_item(item)
new_item = dict(item)
new_item['redownload_url'] = _urls[key]
results[key] = new_item
return results
# Given a dictionary of bandcamp items, calculates their download path based
# on the configured format, appends the item key if two names collide, and
# stores the result in the item key 'file_path'.
def add_item_file_paths(_items : dict):
filenames = {} # maps item key to the computed filename
filename_counts = {} # number of occurrences of each computed filename
# First calculate the user-formatted name for everything
for key,item in _items.items():
# For backwards compatibility, use 'artist' and 'title' (the equivalent
# keys from the album landing page data blob) instead of 'band_name'
# and 'item_title' when parsing the filename format.
track_info = {
'item_id': item['item_id'],
'artist': sanitize_value(item['band_name']),
'title': sanitize_value(item['item_title']),
}
filename = CONFIG['FILENAME_FORMAT'].format(**track_info)
filenames[key] = filename
filename_counts[filename] = filename_counts.get(filename, 0) + 1
# Now rescan to apply final (deduped) paths.
for key,item in _items.items():
filename = filenames[key]
if filename_counts[filename] > 1:
# This filename is not unique, append the (globally unique) item
# key so their downloads don't overwrite each other.
filename = filename + '-' + key
item['file_path'] = os.path.join(CONFIG['OUTPUT_DIR'], filename)
# Check if a file already exists at the given path that matches the given
# metadata size string.
# _download_size, if nonempty, should be of the form "[num]MB" or "[num]GB"
def download_exists(_file_path : str, _download_size : str) -> bool:
if not os.path.exists(_file_path):
return False
if CONFIG['FORCE']:
if CONFIG['VERBOSE']: CONFIG['TQDM'].write('--force flag was given. Overwriting existing file at [{}].'.format(_file_path))
return False
if not _download_size:
# This is rare but can happen, a few downloads have no size
# metadata -- to be safe, don't report that we already have
# the file if we can't verify the size.
if CONFIG['VERBOSE'] >= 2: CONFIG['TQDM'].write('File at [{}] has no expected download size. Re-downloading.'.format(_file_path))
return False
actual_size = os.stat(_file_path).st_size
if _download_size.endswith("MB"):
actual_mb = actual_size / (1024 * 1024)
expected_mb = float(_download_size[:-2])
offset = abs(actual_mb - expected_mb)
elif _download_size.endswith("GB"):
# the field is called "size_mb" but it also uses GB
actual_gb = actual_size / (1024 * 1024 * 1024)
expected_gb = float(_download_size[:-2])
offset = abs(actual_gb - expected_gb)
else:
if CONFIG['VERBOSE'] >= 2: CONFIG['TQDM'].write('File at [{}] has unrecognized expected download size [{}]. Re-downloading.'.format(_file_path, _download_size))
return False
# We should expect a difference of <= 0.05 since bandcamp always gives
# sizes to one decimal place, but sometimes the reported value is slightly
# off. The main cause seems to be that archives are regenerated when
# needed and compression settings are not stable over time.
if offset < 0.15:
if CONFIG['VERBOSE'] >= 3: CONFIG['TQDM'].write('Skipping file that already exists: [{}]'.format(_file_path))
return True
if CONFIG['VERBOSE'] >= 2: CONFIG['TQDM'].write('File at [{}] is the wrong size. Expected [{}] but was [{}]. Re-downloading.'.format(_file_path, _download_size, actual_size))
return False
# Get the pagedata json for a url, applying retry / wait settings.
def pagedata_with_retry(_url : str) -> dict:
for attempt in range(CONFIG['MAX_URL_ATTEMPTS']):
# If this isn't the first attempt, apply the retry wait interval.
if attempt != 0: time.sleep(CONFIG['URL_RETRY_WAIT'])
try:
data = pagedata_for_url(_url)
if data: return data
if CONFIG['VERBOSE']: CONFIG['TQDM'].write('WARN: no pagedata found fetching album url [{}] (you may be rate-limited, try increasing --wait-after-download or --retry-wait)'.format(_url))
except IOError as e:
if CONFIG['VERBOSE'] >=2: CONFIG['TQDM'].write('WARN: I/O Error on attempt # [{}] to download the page data at [{}].'.format(attempt, _url))
except Exception as e:
print_exception(e, 'An exception occurred trying to download album url [{}]:'.format(_url))
CONFIG['TQDM'].write("ERROR: Couldn't download pagedata for album at url [{}]".format(_url))
return {}
# Download the file for the given bandcamp album, and notify TQDM when done.
def download_and_log_album(_album : dict):
try:
download_album(_album)
except Exception as e:
band_name = _album.get('band_name', '')
title = _album.get('item_title', '')
print_exception(e, 'Trying to download album [{}] with artist [{}] and title [{}]:'.format(
key_for_item(_album), band_name, title))
CONFIG['TQDM'].update()
time.sleep(CONFIG['POST_DOWNLOAD_WAIT'])
# Download the file for the given bandcamp album. Sets the key 'extension'
# to the file extension for this item, and the key 'downloaded' to whether
# a file was successfully downloaded.
def download_album(_album : dict):
_album['downloaded'] = False
if 'tralbum_type' in _album:
_album['extension'] = extension_from_type(_album['tralbum_type'], CONFIG['FORMAT'])
else:
# This key should never be missing, but if it is, we'll try to recover
# the extension later from the download itself.
_album['extension'] = ''
album_url = _album['redownload_url']
data = pagedata_with_retry(album_url)
if not data: return # pagedata_with_retry already logged failure
download_item = data['download_items'][0]
title = download_item['title']
if not 'downloads' in download_item:
CONFIG['TQDM'].write('WARN: Album [{}] at url [{}] has no downloads available.'.format(title, album_url))
return
if not CONFIG['FORMAT'] in download_item['downloads']:
CONFIG['TQDM'].write('WARN: Album [{}] at url [{}] does not have a download for format [{}].'.format(title, album_url, CONFIG['FORMAT']))
return
download = download_item['downloads'][CONFIG['FORMAT']]
download_url = download['url']
download_size = download.get('size_mb', None)
# If this is an unknown format, get the extension from the download url.
if not _album['extension']: _album['extension'] = extension_from_url(download_url)
file_path = _album['file_path'] + _album['extension']
# Only start the download if a matching file doesn't already exist.
if not download_exists(file_path, download_size):
_album['downloaded'] = download_file(download_url, _album)
def download_file(_url : str, _album : dict, _attempt : int = 1) -> bool:
"""Download the given url to the given file path.
Returns True if the url was successfully downloaded, False otherwise."""
if CONFIG['DRY_RUN']:
if CONFIG['VERBOSE'] >= 2:
CONFIG['TQDM'].write('Dry run: skipping file download for url [{}]'.format(_url))
return True
try:
response = requests.get(
_url,
cookies = CONFIG['COOKIE_JAR'],
impersonate='chrome',
stream = True,
)
response.raise_for_status()
expected_size = int(response.headers['content-length'])
filename_match = FILENAME_REGEX.search(response.headers['content-disposition'])
original_filename = urllib.parse.unquote(filename_match.group(1)) if filename_match else _url.split('/')[-1]
extension = os.path.splitext(original_filename)[1]
if extension != '' and extension != _album['extension']:
if CONFIG['VERBOSE']: CONFIG['TQDM'].write('WARN: expected extension [{}] but download at [{}] gives [{}].'.format(
_album['extension'], _url, extension))
_album['extension'] = extension
file_path = _album['file_path'] + _album['extension']
if os.path.exists(file_path) and not CONFIG['FORCE']:
# This should be quite rare since we already screened existing
# files against the album's size metadata, but check one last
# time if we already have a file of the right size.
actual_size = os.stat(file_path).st_size
if expected_size == actual_size:
if CONFIG['VERBOSE'] >= 2: CONFIG['TQDM'].write('Canceling download that matches existing file: [{}]'.format(file_path))
return False
if CONFIG['VERBOSE'] >= 2: CONFIG['TQDM'].write('Album being saved to [{}]'.format(file_path))
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, 'wb') as fh:
for chunk in response.iter_content():
fh.write(chunk)
actual_size = fh.tell()
if expected_size != actual_size:
raise IOError('Incomplete read. {} bytes read, {} bytes expected'.format(actual_size, expected_size))
return True
except IOError as e:
if _attempt < CONFIG['MAX_URL_ATTEMPTS']:
if CONFIG['VERBOSE'] >=2: CONFIG['TQDM'].write('WARN: I/O Error on attempt # [{}] to download the file at [{}]. Trying again...'.format(_attempt, _url))
time.sleep(CONFIG['URL_RETRY_WAIT'])
return download_file(_url, _album, _attempt + 1)
else:
print_exception(e, 'An exception occurred trying to download file url [{}]:'.format(_url))
except Exception as e:
print_exception(e, 'An exception occurred trying to download file url [{}]:'.format(_url))
return False
def print_exception(_e : Exception, _msg : str = '') -> None:
CONFIG['TQDM'].write('\nERROR: {}'.format(_msg))
CONFIG['TQDM'].write('\n'.join(traceback.format_exception(etype=type(_e), value=_e , tb=_e.__traceback__)))
CONFIG['TQDM'].write('\n')
# Windows has some picky requirements about file names
# So let's replace known bad characters with '-'
def sanitize_filename(_path : str) -> str:
if sys.platform.startswith('win'):
return re.sub(SANITIZE_PATH_WINDOWS_REGEX, '-', _path)
else:
# Remove `/`
return _path.replace('/', '-')
def sanitize_value(_value : any) -> any:
if type(_value) == str: return sanitize_filename(_value)
return _value
def extension_from_url(_url : str) -> str:
path = urllib.parse.urlparse(_url).path
filename = path.split('/')[-1]
if '.' not in filename: return ''
return path[path.rindex('.'):]
def extension_from_type(_download_type : str, _format : str) -> str:
if _download_type == "a": return ".zip"
if CONFIG['FORMAT'] in FORMAT_EXTENSIONS:
return FORMAT_EXTENSIONS[CONFIG['FORMAT']]
return ''
def get_cookies():
if CONFIG['COOKIES']:
# First try it as a mozilla cookie jar
try:
cj = http.cookiejar.MozillaCookieJar(CONFIG['COOKIES'])
cj.load()
return cj
except Exception as e:
if CONFIG['VERBOSE'] >=2: print(f"Cookie file at [{CONFIG['COOKIES']}] not a mozilla cookie jar.\nTrying it as a cookie store for the browser [{CONFIG['BROWSER']}]...")
# Next try it with browser_cookie
try:
func = getattr(browser_cookie3, CONFIG['BROWSER'])
return func(domain_name = 'bandcamp.com', cookie_file = CONFIG['COOKIES'])
except AttributeError:
raise Exception('Browser type [{}] is unknown. Can\'t pull cookies, so can\'t authenticate with bandcamp.'.format(CONFIG['BROWSER']))
try:
func = getattr(browser_cookie3, CONFIG['BROWSER'])
return func(domain_name = 'bandcamp.com')
except AttributeError:
raise Exception('Browser type [{}] is unknown. Can\'t pull cookies, so can\'t authenticate with bandcamp.'.format(CONFIG['BROWSER']))
def _is_zip(file_path: str) -> bool:
# Determine if the file is a compressed .zip archive
return file_path.endswith('.zip') if file_path else False
if __name__ == '__main__':
sys.exit(main())