-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdynamodb_scan_item_count.py
82 lines (62 loc) · 2.09 KB
/
dynamodb_scan_item_count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import sys
import os
import boto3
import multiprocessing
import itertools
from time import sleep
# Uncomment this section for after AWS has updated total item count
# new_table = sys.argv[1]
# region = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
# iam_role = boto3.session.Session(profile_name='intern')
# dynamodb = iam_role.resource('dynamodb', region_name=region)
# table = dynamodb.Table(new_table)
# print table.item_count
def scan_table(src_table, client, segment, total_segments, queue):
item_count = 0
paginator = client.get_paginator('scan')
for page in paginator.paginate(
TableName=src_table,
Select='ALL_ATTRIBUTES',
ReturnConsumedCapacity='NONE',
ConsistentRead=True,
Segment=segment,
TotalSegments=total_segments,
PaginationConfig={"PageSize": 500}):
item_count += len(page['Items'])
queue.put(item_count)
if __name__ == "__main__":
if len(sys.argv) < 2:
print 'Usage: %s <source_table_name>' % sys.argv[0]
sys.exit(1)
table_1 = sys.argv[1]
region = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
iam_role = boto3.session.Session(profile_name='intern')
db_client = iam_role.client('dynamodb')
queue = multiprocessing.Queue()
results = []
pool_size = 4
pool = []
spinner = itertools.cycle(['-', '/', '|', '\\'])
for i in range(pool_size):
worker = multiprocessing.Process(
target=scan_table,
kwargs={
'src_table': table_1,
'client': db_client,
'segment': i,
'total_segments': pool_size,
'queue': queue
}
)
pool.append(worker)
worker.start()
for process in pool:
while process.is_alive():
sys.stdout.write(spinner.next())
sys.stdout.flush()
sleep(0.1)
sys.stdout.write('\b')
for p in pool:
count = queue.get() # will block
results.append(count)
print '*** %d items counted. Exiting... ***' % sum(results)