Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handy functions #5

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
12 changes: 4 additions & 8 deletions examples/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,9 @@ def quoted():
@generate
def array():
yield lbrack
first = yield value
rest = yield (comma >> value).many()
elements = yield value.sep_by(comma)
yield rbrack
rest.insert(0, first)
return rest
return elements

@generate
def object_pair():
Expand All @@ -59,11 +57,9 @@ def object_pair():
@generate
def json_object():
yield lbrace
first = yield object_pair
rest = yield (comma >> value).many()
members = yield object_pair.sep_by(comma)
yield rbrace
rest.insert(0, first)
return dict(rest)
return dict(members)

value = quoted | number | json_object | array | true | false | null

Expand Down
31 changes: 26 additions & 5 deletions src/parsy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def success(index, value): return Result(True, index, value, -1, None)
@staticmethod
def failure(index, expected): return Result(False, -1, None, index, expected)

# collect the furthest failure from self and other
def aggregate(self, other):
"""Collect the furthest failure from self and other."""
if not other: return self
if self.furthest >= other.furthest: return self

Expand Down Expand Up @@ -106,11 +106,11 @@ def result(self, res):
return self >> success(res)

def many(self):
return self.times(0, float('inf'))
return self.at_least(0)

def times(self, min, max=None):
# max=None means exactly min
# min=max=None means from 0 to infinity
# max can also be float('inf')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

float('inf') is math.inf in modern Python; at least this doc should be updated, not sure about the code - do we keep compatibility with earlier Python 3.x versions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like math.inf was only added in 3.5, I wouldn't break 3.3 and 3.4 compat for this. Although we could pull it out as a constant at module level for a performance improvement.

if max is None:
max = min

Expand Down Expand Up @@ -139,7 +139,16 @@ def at_most(self, n):
return self.times(0, n)

def at_least(self, n):
return self.times(n) + self.many()
return self.times(n, float('inf'))

def sep_by(self, sep, *, min=0, max=float('inf')):
zero_times = success([])
if max == 0:
return zero_times
res = self.times(1) + (sep >> self).times(min - 1, max - 1)
if min == 0:
res |= zero_times
return res

def desc(self, description):
return self | fail(description)
Expand All @@ -154,6 +163,16 @@ def marked():

return marked

def should_fail(self):
@Parser
def fail_parser(stream, index):
res = self(stream, index)
if res.status:
return Result.failure(index, 'should fail')
return Result.success(index, res)

return fail_parser

def __add__(self, other):
return seq(self, other).map(lambda res: res[0] + res[1])

Expand Down Expand Up @@ -194,7 +213,7 @@ def alt_parser(stream, index):

return alt_parser

def seq(*parsers):
def seq(*parsers, f=None):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not up to me, but I'd prefer f be named seq_fn for consistency with map/bind.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the difference is that you almost certainly wouldn't pass arguments to map/bind by name (and most likely you wouldn't even know what they're called), whereas in this case it's keyword-only, so I wanted to use a shorter name. I do not insist though. Let's wait for @jneen to decide.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that this extra keyword parameter would work better as a separate method, perhaps named combine. I can see situations where it would be useful outside of the context of seq, such as after many, or if you want to reuse a parser defined elsewhere, and it would be cleaner this way I think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may not be following; how would it be different from .map()?

And my present self is not that convinced with "compare before and after" example from my 2.5-year-younger self, so maybe we don't need that feature at all.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a PR for this feature here - python-parsy#3 - which happens to include docs that answer your question. Let me know what you think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. Yes, that looks good and is indeed cleaner. 👍

How would you use it with .many() though? (you don't know the arg count; you use .times() when you do know)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, times() would be a better example, although occasionally you might use manyy() followed by some constructor functions that take an arbitrary number of arguments via*args.

if not parsers:
return success([])

Expand All @@ -209,6 +228,8 @@ def seq_parser(stream, index):
index = result.index
values.append(result.value)

if f is not None:
values = f(*values)
return Result.success(index, values).aggregate(result)

return seq_parser
Expand Down
66 changes: 65 additions & 1 deletion test/test_parsy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from parsy import string, regex, generate, ParseError, letter, digit
from parsy import *
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This introduces tons of flake8 errors/warnings, plus can give static analysers/"goto definition" a harder time for anyone working on the code. I don't think it is worth it.

import pdb
import unittest

Expand Down Expand Up @@ -165,5 +165,69 @@ def test_times_with_min_and_max_and_then(self):
self.assertRaises(ParseError, then_digit.parse, 'xyzwv1')
self.assertRaises(ParseError, then_digit.parse, 'x1')

def test_should_fail(self):
not_a_digit = digit.should_fail() >> regex(r'.*')

self.assertEqual(not_a_digit.parse('a'), 'a')
self.assertEqual(not_a_digit.parse('abc'), 'abc')
self.assertEqual(not_a_digit.parse('a10'), 'a10')
self.assertEqual(not_a_digit.parse(''), '')

self.assertRaises(ParseError, not_a_digit.parse, '8')
self.assertRaises(ParseError, not_a_digit.parse, '8ab')

def test_sep_by(self):
digit_list = digit.map(int).sep_by(string(','))

self.assertEqual(digit_list.parse('1,2,3,4'), [1, 2, 3, 4])
self.assertEqual(digit_list.parse('9,0,4,7'), [9, 0, 4, 7])
self.assertEqual(digit_list.parse('3,7'), [3, 7])
self.assertEqual(digit_list.parse('8'), [8])
self.assertEqual(digit_list.parse(''), [])

self.assertRaises(ParseError, digit_list.parse, '8,')
self.assertRaises(ParseError, digit_list.parse, ',9')
self.assertRaises(ParseError, digit_list.parse, '82')
self.assertRaises(ParseError, digit_list.parse, '7.6')

def test_sep_by_with_min_and_max(self):
digit_list = digit.map(int).sep_by(string(','), min=2, max=4)

self.assertEqual(digit_list.parse('1,2,3,4'), [1, 2, 3, 4])
self.assertEqual(digit_list.parse('9,0,4,7'), [9, 0, 4, 7])
self.assertEqual(digit_list.parse('3,7'), [3, 7])

self.assertRaises(ParseError, digit_list.parse, '8')
self.assertRaises(ParseError, digit_list.parse, '')
self.assertRaises(ParseError, digit_list.parse, '8,')
self.assertRaises(ParseError, digit_list.parse, ',9')
self.assertRaises(ParseError, digit_list.parse, '82')
self.assertRaises(ParseError, digit_list.parse, '7.6')

def test_seq(self):
int_ = digit.at_least(1).map(''.join).map(int)
addition = seq(int_, string('+'), int_).map(lambda l: l[0] + l[2])

self.assertEqual(addition.parse('2+2'), 4)
self.assertEqual(addition.parse('9+4'), 13)
self.assertEqual(addition.parse('20+19'), 39)

self.assertRaises(ParseError, addition.parse, '32')
self.assertRaises(ParseError, addition.parse, '3+')
self.assertRaises(ParseError, addition.parse, '5-67')

def test_seq_with_post_processing(self):
int_ = digit.at_least(1).map(''.join).map(int)
addition = seq(int_, string('+'), int_, f=lambda a, plus, b: a + b)

self.assertEqual(addition.parse('2+2'), 4)
self.assertEqual(addition.parse('9+4'), 13)
self.assertEqual(addition.parse('20+19'), 39)

self.assertRaises(ParseError, addition.parse, '32')
self.assertRaises(ParseError, addition.parse, '3+')
self.assertRaises(ParseError, addition.parse, '5-67')


if __name__ == '__main__':
unittest.main()