-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate_input.py
75 lines (62 loc) · 2.43 KB
/
generate_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
RUNNING INSTRUCTIONS:
$ python generate_input.py <seed value> <unsorted file name> <number of values> <no of digits>
PARAMETER DESCRIPTION:
<seed value> : It is an integer value that dictates the random number generation. For some fixed seed value, the sequence of random numbers generated will always be same.
<unsorted file name> : This is the filename that you would like to give to the unsorted file that is being generated.
<number of values> : This denotes the number of values that need to be written into the generated file.
<no of digits> : This parameter sets the max number of digits in the values generated by Random Number Generator.
EXAMPLE:
$ python generate_input.py 1337 input.txt 100000000 10
This will generate file named "input.txt" which has 100000000 numbers with each having maximum of 10 digits. This file will be around 1GB in size.
You can play around with the values to generate different files.
'''
from random import seed, randint
from math import ceil
import sys
import time
n_values = 10_000_000
n_digits = 4
default_seed = 1337
BUFFER_SIZE = 100_000
out_fname = "unsorted_file.txt"
if __name__ == '__main__':
try:
default_seed = int(sys.argv[1])
except:
pass
try:
out_fname = sys.argv[2]
except:
pass
try:
n_values = int(sys.argv[3])
except:
pass
try:
n_digits = int(sys.argv[4])
except:
pass
seed(default_seed)
start_range = 1#10**(n_digits-1)
end_range = 10**n_digits
start_time = time.time()
o_file = open(out_fname, "w")
out_count = ceil(n_values / BUFFER_SIZE)
written_count = 0
for o_loop in range(out_count):
if o_loop != out_count-1:
buffer = [str(randint(start_range, end_range)) for _ in range(BUFFER_SIZE)]
written_count += len(buffer)
o_file.write(",".join(buffer) + ",")
else:
remaining = n_values % BUFFER_SIZE
if remaining == 0:
remaining = BUFFER_SIZE
buffer = [str(randint(start_range, end_range)) for _ in range(remaining)]
written_count += len(buffer)
o_file.write(",".join(buffer))
o_file.close()
end_time = time.time()
print("Number written : ", written_count)
print("Time taken : ", end_time-start_time)