-
Notifications
You must be signed in to change notification settings - Fork 1
/
xcstorage.py
67 lines (61 loc) · 2.38 KB
/
xcstorage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from google.cloud import storage
from pathlib import Path
import argparse
import logging
import re, json, datetime, os, hashlib
def get_vehicle_folders(bucket_name='dumbdata'):
"""get the vehicle ids"""
client = storage.Client()
logging.info(client)
bucket = client.get_bucket(bucket_name)
logging.info(bucket)
blobs = bucket.list_blobs(prefix='ingest')
l = []
for item in blobs:
path = item.name.strip()
if Path(item.name.strip()).suffix == '.gz':
vehicle_id = path.split('/')[1]
if not any(d['vehicle_id'] == vehicle_id for d in l):
logging.info('Added vehicle \'{}\'.'.format(vehicle_id))
files_path = 'gs://' + bucket_name + '/ingest/' + vehicle_id + '/*.gz'
l.append({'vehicle_id': vehicle_id, 'path': files_path})
logging.info('Found {} vechicle(s)...'.format(len(l)))
return l
def move_xcfiles(file_list, bucket_name='dumbdata'):
client = storage.Client()
bucket = client.get_bucket(bucket_name)
for obj in file_list:
oldpath = obj['rawpath']
blob = bucket.blob(oldpath)
pathparts = oldpath.split('/')[1:]
newpath = 'digested'
for part in pathparts:
newpath += '/{}'.format(part)
logging.info('Moving blob from \'{}\' to \'{}\''.format(oldpath, newpath))
bucket.rename_blob(blob, newpath)
def get_xc_files(bucket_name='dumbdata'):
"""Lists all the blobs in the bucket."""
coll = []
client = storage.Client()
logging.info(client)
bucket = client.get_bucket(bucket_name)
logging.info(bucket)
blobs = bucket.list_blobs(prefix='ingest')
l = []
for item in blobs:
l.append(str(item.name).strip())
logging.info(l)
logging.info('{} blobs found.'.format(len(l)))
#xcfilelist = []
for path in l:
#logging.info(path)
# logging.info('suffix: {}'.format(Path(path.strip()).suffix))
# logging.info('Suffix: \'{}\''.format(ext))
if Path(path.strip()).suffix == '.gz':
logging.info('XC File: {}'.format(path))
car_id = path.split('/')[1]
file_path = 'gs://' + bucket_name + '/' + path
file_id = hashlib.sha256(path.split('/')[-1]).hexdigest()
d = {'vehicle_id': car_id, 'file_path': file_path, 'xcfile_id': file_id, 'rawpath': path }
coll.append(d)
return coll