Skip to content

Commit 2f7dc39

Browse files
committed
Merge branch 'feature/api_floatfield' into develop
2 parents 6c9aa09 + a79d475 commit 2f7dc39

File tree

4 files changed

+73
-35
lines changed

4 files changed

+73
-35
lines changed

raspador/fields.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,16 @@ class BaseField(object):
6363
You can enter a integer number, as the group index::
6464
6565
>>> s = "Contador de Reduções Z: 1246"
66-
>>> field = BaseField(r'Contador de Reduç(ão|ões) Z:\s*(\d+)', \
67-
groups=1, input_processor=int)
66+
>>> regex = r'Contador de Reduç(ão|ões) Z:\s*(\d+)'
67+
>>> field = BaseField(regex, groups=1, input_processor=int)
6868
>>> field.parse_block(s)
6969
1246
7070
7171
Or a list of integers::
7272
7373
>>> s = "Data do movimento: 02/01/2013 10:21:51"
74-
>>> c = BaseField(r'^Data .*(movimento|cupom): (\d+)/(\d+)/(\d+)',\
75-
groups=[1, 2, 3])
74+
>>> regex = r'^Data .*(movimento|cupom): (\d+)/(\d+)/(\d+)'
75+
>>> c = BaseField(regex, groups=[1, 2, 3])
7676
>>> c.parse_block(s)
7777
['02', '01', '2013']
7878
@@ -121,13 +121,13 @@ def __init__(self, search=None, default=None, is_list=False,
121121
if not hasattr(self.groups, '__iter__'):
122122
self.groups = (self.groups,)
123123

124-
self._setup()
124+
self.setup()
125125

126126
@property
127127
def _search_method(self):
128128
return self.search.findall
129129

130-
def _setup(self):
130+
def setup(self):
131131
"Hook to special setup required on child classes"
132132
pass
133133

@@ -189,20 +189,39 @@ def to_python(self, value):
189189

190190

191191
class FloatField(BaseField):
192-
"Removes thousand separator and converts to float."
193-
def to_python(self, value):
194-
value = value.replace(',', '')
195-
return float(value)
192+
"""
193+
Sanitizes captured value according to thousand and decimal separators and
194+
converts to float.
195+
"""
196+
default_thousand_separator = ','
197+
default_decimal_separator = '.'
196198

199+
def __init__(self, search, thousand_separator=None, decimal_separator=None,
200+
**kwargs):
201+
super(FloatField, self).__init__(search, **kwargs)
202+
self.thousand_separator = thousand_separator if thousand_separator \
203+
else self.default_thousand_separator
204+
self.decimal_separator = decimal_separator if decimal_separator \
205+
else self.default_decimal_separator
197206

198-
class BRFloatField(BaseField):
199-
"Removes thousand separator and converts to float (Brazilian format)"
200207
def to_python(self, value):
201-
value = value.replace('.', '')
202-
value = value.replace(',', '.')
208+
value = value.replace(self.thousand_separator, '')
209+
value = value.replace(self.decimal_separator, '.')
203210
return float(value)
204211

205212

213+
class BRFloatField(FloatField):
214+
"""
215+
Removes thousand separator and converts to float (Brazilian format).
216+
217+
.. deprecated:: 0.2.2
218+
219+
Use :py:class:`~raspador.fields.FloatField` instead.
220+
"""
221+
default_thousand_separator = '.'
222+
default_decimal_separator = ','
223+
224+
206225
class IntegerField(BaseField):
207226
def to_python(self, value):
208227
return int(value)
@@ -213,7 +232,7 @@ class BooleanField(BaseField):
213232
Returns true if the block is matched by Regex, and is at least some value
214233
is captured.
215234
"""
216-
def _setup(self):
235+
def setup(self):
217236
self.default = False
218237

219238
@property

raspador/parser.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from .item import Dictionary
1010

1111
logger = logging.getLogger(__name__)
12+
if hasattr(logging, 'NullHandler'):
13+
logger.addHandler(logging.NullHandler())
1214

1315

1416
class ParserMixin(object):
@@ -57,14 +59,16 @@ def parse_iterator(self, iterator):
5759
yield res
5860

5961
def parse_block(self, block):
60-
logger.debug('parse_block: %r:%s', type(block), block)
62+
logger.debug('%s.block: %r:%s', self.__class__.__name__, type(block),
63+
block)
6164
self.cache.append(block)
6265

6366
if self.has_search_begin and not self.begin_found:
6467
self.begin_found = bool(self._begin.match(block))
6568

6669
if self.begin_found:
67-
logger.debug('init found: %r', self.begin_found)
70+
logger.debug('%s.begin_found: %r', self.__class__.__name__,
71+
self.begin_found)
6872
if not self.has_item:
6973
self.item = self.default_item_class()
7074
if self.has_search_end:
@@ -108,6 +112,7 @@ def finalize_item(self):
108112
return res
109113

110114
def assign_value_into_item(self, name, value):
115+
logger.debug('%s.%s = %r', self.__class__.__name__, name, value)
111116
if isinstance(value, list) and not name in self.item:
112117
self.item[name] = value
113118
elif isinstance(value, list) and hasattr(self.item[name], 'extend'):

tests/test_fields.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,35 +9,35 @@
99

1010
class TestBaseField(unittest.TestCase):
1111

12-
def test_should_retornar_valor_no_analizar(self):
12+
def test_should_return_value_on_parse(self):
1313
s = "02/01/2013 10:21:51 COO:022734"
1414
field = BaseField(r'COO:(\d+)')
1515
value = field.parse_block(s)
1616
self.assertEqual(value, '022734')
1717

18-
def test_should_retornar_none_sem_search(self):
18+
def test_should_return_none_without_search(self):
1919
s = "02/01/2013 10:21:51 COO:022734"
2020
field = BaseField()
2121
value = field.parse_block(s)
2222
self.assertEqual(value, None)
2323

24-
def test_should_aceitar_callback(self):
24+
def test_should_accept_callback(self):
2525
s = "02/01/2013 10:21:51 COO:022734"
2626

27-
def dobro(value):
27+
def double(value):
2828
return int(value) * 2
2929

30-
field = BaseField(r'COO:(\d+)', input_processor=dobro)
30+
field = BaseField(r'COO:(\d+)', input_processor=double)
3131
value = field.parse_block(s)
3232
self.assertEqual(value, 45468) # 45468 = 2 x 22734
3333

34-
def test_should_recusar_callback_invalido(self):
34+
def test_should_refuse_invalid_callback(self):
3535
self.assertRaises(
3636
TypeError,
37-
lambda: BaseField(r'COO:(\d+)', input_processor='pegadinha')
37+
lambda: BaseField(r'COO:(\d+)', input_processor='not a callcabk')
3838
)
3939

40-
def test_should_utilizar_grupo_quando_informado(self):
40+
def test_should_return_groups(self):
4141
s = "Contador de Reduções Z: 1246"
4242
field = BaseField(r'Contador de Reduç(ão|ões) Z:\s*(\d+)', groups=1,
4343
input_processor=int)
@@ -46,43 +46,57 @@ def test_should_utilizar_grupo_quando_informado(self):
4646

4747

4848
class TestIntegerField(unittest.TestCase):
49-
def test_should_obter_valor(self):
49+
def test_should_return_value(self):
5050
s = "02/01/2013 10:21:51 COO:022734"
5151
field = IntegerField(r'COO:(\d+)')
5252
value = field.parse_block(s)
5353
self.assertEqual(value, 22734)
5454

5555

5656
class TestFloatField(unittest.TestCase):
57-
def test_should_obter_valor(self):
57+
def test_should_return_value(self):
5858
s = "VENDA BRUTA DIÁRIA: 793.00"
5959
field = FloatField(r'VENDA BRUTA DIÁRIA:\s+(\d+\.\d+)')
6060
value = field.parse_block(s)
6161
self.assertEqual(value, 793.0)
6262

63-
def test_should_obter_valor_com_separador_de_milhar(self):
63+
def test_should_return_value_with_thousand_separator(self):
6464
s = "VENDA BRUTA DIÁRIA: 10,036.70"
6565
field = FloatField(r'VENDA BRUTA DIÁRIA:\s+([\d,]+.\d+)')
6666
value = field.parse_block(s)
6767
self.assertEqual(value, 10036.7)
6868

69+
def test_should_return_value_with_custom_thousand_separator(self):
70+
s = "VENDA BRUTA: 10#036.70"
71+
field = FloatField(r'VENDA BRUTA:\s+([\d#]+.\d+)',
72+
thousand_separator='#')
73+
value = field.parse_block(s)
74+
self.assertEqual(value, 10036.7)
75+
76+
def test_should_return_value_with_custom_decimal_separator(self):
77+
s = "VENDA BRUTA: 10,036#70"
78+
field = FloatField(r'VENDA BRUTA:\s+([\d,]+#\d+)',
79+
decimal_separator='#')
80+
value = field.parse_block(s)
81+
self.assertEqual(value, 10036.7)
82+
6983

7084
class TestBRFloatField(unittest.TestCase):
71-
def test_should_obter_valor(self):
85+
def test_should_return_value(self):
7286
s = "VENDA BRUTA DIÁRIA: 793,00"
7387
field = BRFloatField(r'VENDA BRUTA DIÁRIA:\s+(\d+,\d+)')
7488
value = field.parse_block(s)
7589
self.assertEqual(value, 793.0)
7690

77-
def test_should_obter_valor_com_separador_de_milhar(self):
91+
def test_should_return_value_with_thousand_separator(self):
7892
s = "VENDA BRUTA DIÁRIA: 10.036,70"
7993
field = BRFloatField(r'VENDA BRUTA DIÁRIA:\s+([\d.]+,\d+)')
8094
value = field.parse_block(s)
8195
self.assertEqual(value, 10036.7)
8296

8397

8498
class TestStringField(unittest.TestCase):
85-
def test_should_obter_valor(self):
99+
def test_should_return_value(self):
86100
s = "1 Dinheiro 0,00"
87101
field = StringField(r'\d+\s+(\w[^\d]+)')
88102
value = field.parse_block(s)
@@ -106,7 +120,7 @@ def test_should_retornar_falso_ao_finalizar_quando_regex_nao_bate(self):
106120

107121

108122
class TestDateField(unittest.TestCase):
109-
def test_should_obter_valor(self):
123+
def test_should_return_value(self):
110124
s = "02/01/2013 10:21:51 COO:022734"
111125
field = DateField(r'^(\d+/\d+/\d+)')
112126
value = field.parse_block(s)
@@ -122,7 +136,7 @@ def test_should_obter_respeitando_format_string(self):
122136

123137

124138
class TestDateTimeField(unittest.TestCase):
125-
def test_should_obter_valor(self):
139+
def test_should_return_value(self):
126140
s = "02/01/2013 10:21:51 COO:022734"
127141
field = DateTimeField(r'^(\d+/\d+/\d+ \d+:\d+:\d+)')
128142
value = field.parse_block(s)

tests/test_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def diff(msg, fn):
6363

6464

6565
class CampoItem(BaseField):
66-
def _setup(self):
66+
def setup(self):
6767
self.search = (r"(\d+)\s(\d+)\s+([\w.#\s/()]+)\s+(\d+)(\w+)"
6868
"\s+X\s+(\d+,\d+)\s+(\w+)\s+(\d+,\d+)")
6969

@@ -92,7 +92,7 @@ class ExtratorDeDados(Parser):
9292

9393
class TotalizadoresNaoFiscais(Parser):
9494
class CampoNF(BaseField):
95-
def _setup(self):
95+
def setup(self):
9696
self.search = r'(\d+)\s+([\w\s]+)\s+(\d+)\s+(\d+,\d+)'
9797

9898
def to_python(self, v):

0 commit comments

Comments
 (0)