7
7
"""
8
8
import gc
9
9
import random
10
- import string
11
10
import sys
12
11
13
12
import numpy as np
16
15
17
16
from datetime import datetime as dt
18
17
19
-
20
- def random_strings (count , max_length ):
21
- result = []
22
- for _ in range (count ):
23
- length = random .randrange (max_length + 1 )
24
- result .append ("" .join (random .choice (string .ascii_letters ) for _ in range (length )))
25
- return result
18
+ from arcticdb .util .test import random_ascii_strings
26
19
27
20
28
21
def generate_dataframe (columns , number_of_rows , strings , index_start = "2000-1-1" ):
@@ -50,7 +43,7 @@ def getsize(df):
50
43
def test_string_dedup_basic (lmdb_version_store_tiny_segment ):
51
44
lib = lmdb_version_store_tiny_segment
52
45
symbol = "test_string_dedup_basic"
53
- original_df = generate_dataframe (["col1" , "col2" , "col3" , "col4" ], 1000 , random_strings (100 , 10 ))
46
+ original_df = generate_dataframe (["col1" , "col2" , "col3" , "col4" ], 1000 , random_ascii_strings (100 , 10 ))
54
47
lib .write (symbol , original_df , dynamic_strings = True )
55
48
read_df_with_dedup = lib .read (symbol , optimise_string_memory = True ).data
56
49
read_df_without_dedup = lib .read (symbol , optimise_string_memory = False ).data
@@ -63,7 +56,7 @@ def test_string_dedup_basic(lmdb_version_store_tiny_segment):
63
56
def test_string_dedup_dynamic_schema (lmdb_version_store_dynamic_schema ):
64
57
lib = lmdb_version_store_dynamic_schema
65
58
symbol = "test_string_dedup_dynamic_schema"
66
- unique_strings = random_strings (100 , 10 )
59
+ unique_strings = random_ascii_strings (100 , 10 )
67
60
original_df = generate_dataframe (["col1" ], 1000 , unique_strings , "2000-1-1" )
68
61
# This will be different to original_df, as the value in each row is chosen at random from the unique string pool
69
62
append_df = generate_dataframe (["col1" ], 1000 , unique_strings , "2010-1-1" )
@@ -91,7 +84,7 @@ def test_string_dedup_nans(lmdb_version_store_tiny_segment):
91
84
lib = lmdb_version_store_tiny_segment
92
85
symbol = "test_string_dedup_nans"
93
86
# Throw a nan into the unique string pool
94
- unique_strings = random_strings (9 , 10 )
87
+ unique_strings = random_ascii_strings (9 , 10 )
95
88
unique_strings .append (np .nan )
96
89
columns = ["col1" , "col2" , "col3" , "col4" ]
97
90
original_df = generate_dataframe (columns , 1000 , unique_strings )
@@ -141,7 +134,7 @@ def test_string_dedup_performance(lmdb_version_store):
141
134
142
135
for unique_string in unique_strings :
143
136
for string_length in string_lengths :
144
- string_pool = random_strings (unique_string , string_length )
137
+ string_pool = random_ascii_strings (unique_string , string_length )
145
138
for rows in number_of_rows :
146
139
print ("Unique strings: {}" .format (unique_string ))
147
140
print ("String length: {}" .format (string_length ))
0 commit comments