Skip to content

Commit 79b1292

Browse files
authored
Union data type (#51)
* Added the `union` data type to the schema to allow a key to be more than one specific type * Reduced the constraints on the `install_requirements` in `setup.py` * Minor fixes and improvements to the overall codebase
1 parent 2bc3d58 commit 79b1292

File tree

18 files changed

+493
-19
lines changed

18 files changed

+493
-19
lines changed

changelog.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,9 @@
4141

4242
* Added support for Python 3.6+ by removing the `__future__` and `dataclass` imports from the codebase
4343
* Downgraded pylint in the developer dependencies to allow for Python 3.6 to run locally
44+
45+
## v0.3.0 (29th January 2023)
46+
47+
* Added the `union` data type to the schema to allow a key to be more than one specific type
48+
* Reduced the constraints on the `install_requirements` in `setup.py`
49+
* Minor fixes and improvements to the overall codebase

docs/schema_components.md

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Below are the various components that can be used to construct a schema with Yam
1515
* [Regex Type](#regex-type)
1616
* [Ruleset Type](#ruleset-type)
1717
* [Enum Type](#enum-type)
18+
* [Union Type](#union-type)
1819

1920
## Schema
2021

@@ -362,7 +363,38 @@ ruleset <ruleset-name> {
362363
Or in a schema block:
363364

364365
```text
365-
Schema {
366+
schema {
366367
logLevel LogLevel
367368
}
368369
```
370+
371+
### Union Type
372+
373+
The `union` type can be used when a key could be one or more types. For example, a key could be either an `int` or a `float`. The union type supports all available types including lists, rulesets, regex, enums, maps and all the other basic types. When defining a `union`, 2 or more types must be defined for it to be valid.
374+
375+
Nested `unions` are *NOT* supported and will raise a parsing error. Unions can be nested in other data types such as a list, rulesets and maps which can then used within a `union`. However direct nesting of unions is not supported.
376+
377+
__Note__: The union will always return a single violation if one or more data types are not met. It currently does not display the individual violations against each type.
378+
379+
An example of a `union` type:
380+
381+
```yaml
382+
items:
383+
- name: item1
384+
price: 10
385+
- name: item2
386+
price: 15.2
387+
```
388+
389+
Then the following schema can be defined:
390+
391+
```text
392+
ruleset Item {
393+
name str
394+
price union(int, float)
395+
}
396+
397+
schema {
398+
items list(Item)
399+
}
400+
```

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import setuptools
44

5-
VERSION = '0.2.2'
5+
VERSION = '0.3.0'
66
PACKAGE_NAME = 'yamlator'
77
DESCRIPTION = 'Yamlator is a CLI tool that allows a YAML file to be validated using a lightweight schema language' # nopep8
88

@@ -48,8 +48,8 @@ def create_long_description():
4848
'yamlator.cmd.outputs'
4949
]),
5050
install_requires=[
51-
'lark==1.0.0',
52-
'PyYAML==6.0'
51+
'lark>=1.0.0',
52+
'PyYAML>=5.4.1'
5353
],
5454
python_requires='>=3.6',
5555
classifiers=[
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ruleset Hello {
2+
name str required true
3+
no_data
4+
}
5+
6+
schema {
7+
hello Hello
8+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
ruleset Config {
3+
kind str
4+
version union(str, int, union(int, str))
5+
}
6+
7+
schema {
8+
config Config optional
9+
}

tests/files/valid/valid.ys

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ enum Employee_department {
55
}
66

77
ruleset PersonAddress {
8-
houseNumber int
8+
houseNumber union(int, str)
99
street str
1010
city str
1111
post_code str

tests/parser/test_parse_schema.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919

2020
from yamlator.utils import load_schema
2121
from yamlator.parser import parse_schema
22-
from yamlator.utils import load_yaml_file
2322
from yamlator.parser import MalformedEnumNameError
2423
from yamlator.parser import MalformedRulesetNameError
2524
from yamlator.parser import MissingRulesError
25+
from yamlator.parser import SchemaParseError
2626
from yamlator.parser import SchemaSyntaxError
2727

2828

@@ -74,11 +74,16 @@ def test_parse_with_valid_content(self):
7474
(
7575
'with_ruleset_not_defined',
7676
'./tests/files/invalid_files/missing_defined_ruleset.ys',
77-
SchemaSyntaxError
77+
SchemaParseError
78+
),
79+
(
80+
'union_with_nested_union',
81+
'./tests/files/invalid_files/nested_union.ys',
82+
SchemaParseError
7883
),
7984
(
80-
'with_invalid_schema_syntax',
81-
'./tests/files/valid/valid.yaml',
85+
'with_invalid_rule_syntax',
86+
'./tests/files/invalid_files/invalid_syntax.ys',
8287
SchemaSyntaxError
8388
)
8489
])
@@ -87,9 +92,9 @@ def test_parse_syntax_errors(self, name: str, schema_file_path: str,
8792
# Unused by test case, however is required by the parameterized library
8893
del name
8994

90-
schema_content = load_yaml_file(schema_file_path)
95+
schema_content = load_schema(schema_file_path)
9196
with self.assertRaises(exception_type):
92-
parse_schema(str(schema_content))
97+
parse_schema(schema_content)
9398

9499

95100
if __name__ == '__main__':

tests/parser/test_schema_transformer.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
container type when the name of the given container is not found
3535
* `test_regex_type` tests transforming regex tokens into a Yamlator
3636
regex type
37+
* `test_union_type` tests transforming a union token into a Union
38+
rule type
39+
* `test_union_type_with_nested_union` tests that a exception
40+
is raised when a union contains a nested union
3741
* `test_type` tests the type transformer returns the token it was
3842
passed so the other transformer can successfully transform it
3943
* `test_schema_entry` tests that given a set of rule tokens within
@@ -57,6 +61,7 @@
5761

5862
from parameterized import parameterized
5963
from yamlator.exceptions import ConstructNotFoundError
64+
from yamlator.exceptions import NestedUnionError
6065

6166
from yamlator.parser import SchemaTransformer
6267
from yamlator.types import EnumItem, Rule, RuleType
@@ -231,6 +236,27 @@ def test_regex_type(self):
231236
self.assertEqual(expected_regex_str, rule_type.regex)
232237
self.assertEqual(rule_type.schema_type, SchemaTypes.REGEX)
233238

239+
def test_union_type(self):
240+
tokens = [
241+
RuleType(SchemaTypes.INT),
242+
RuleType(SchemaTypes.STR),
243+
RuleType(SchemaTypes.LIST, sub_type=RuleType(SchemaTypes.INT))
244+
]
245+
246+
union_type = self.transformer.union_type(tokens)
247+
self.assertIsNotNone(union_type)
248+
self.assertEqual(SchemaTypes.UNION, union_type.schema_type)
249+
250+
def test_union_type_with_nested_union(self):
251+
tokens = [
252+
RuleType(SchemaTypes.INT),
253+
RuleType(SchemaTypes.STR),
254+
RuleType(SchemaTypes.LIST, sub_type=RuleType(SchemaTypes.INT)),
255+
RuleType(SchemaTypes.UNION)
256+
]
257+
with self.assertRaises(NestedUnionError):
258+
self.transformer.union_type(tokens)
259+
234260
def test_type(self):
235261
type_token = self.transformer.type((self.name_token, ))
236262
self.assertEqual(self.name_token, type_token)

tests/test_types.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Test case for the Yamlator types module"""
2+
3+
import unittest
4+
5+
from parameterized import parameterized
6+
7+
from yamlator.types import RuleType
8+
from yamlator.types import UnionRuleType
9+
from yamlator.types import SchemaTypes
10+
11+
12+
class TestRuleType(unittest.TestCase):
13+
"""Test cases for the RuleType objects"""
14+
15+
@parameterized.expand([
16+
('str_type', RuleType(SchemaTypes.STR), 'str'),
17+
('int_type', RuleType(SchemaTypes.INT), 'int'),
18+
('float_type', RuleType(SchemaTypes.FLOAT), 'float'),
19+
('int_map', RuleType(
20+
SchemaTypes.MAP,
21+
sub_type=RuleType(SchemaTypes.INT)), 'map(int)'),
22+
('map_of_int_maps', RuleType(
23+
SchemaTypes.MAP,
24+
sub_type=RuleType(
25+
SchemaTypes.MAP,
26+
sub_type=RuleType(SchemaTypes.INT)
27+
)
28+
),
29+
'map(map(int))'),
30+
('str_list', RuleType(
31+
SchemaTypes.LIST,
32+
sub_type=RuleType(SchemaTypes.INT)), 'list(int)'),
33+
('list_of_str_lists', RuleType(
34+
SchemaTypes.LIST,
35+
sub_type=RuleType(
36+
SchemaTypes.LIST,
37+
sub_type=RuleType(SchemaTypes.STR))
38+
),
39+
'list(list(str))'),
40+
('enum_type', RuleType(
41+
SchemaTypes.ENUM, lookup='Numbers'),
42+
'Numbers'),
43+
('ruleset_type',
44+
RuleType(SchemaTypes.RULESET, lookup='Details'),
45+
'Details'),
46+
('any_type', RuleType(SchemaTypes.ANY), 'any'),
47+
('regex_type',
48+
RuleType(SchemaTypes.REGEX, regex='^role'),
49+
'Regex(^role)'),
50+
('bool_type', RuleType(SchemaTypes.BOOL), 'bool'),
51+
('union_type', UnionRuleType([
52+
RuleType(SchemaTypes.INT),
53+
RuleType(
54+
SchemaTypes.LIST,
55+
sub_type=RuleType(
56+
SchemaTypes.LIST,
57+
sub_type=RuleType(SchemaTypes.STR)
58+
)
59+
)
60+
]), 'union(int, list(list(str)))')
61+
])
62+
def test_rule_type_string_representation(self, name: str, rtype: RuleType,
63+
expected_string: str):
64+
del name
65+
66+
actual_string = str(rtype)
67+
self.assertEqual(expected_string, actual_string)
68+
69+
70+
if __name__ == '__main__':
71+
unittest.main()
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""Test cases for the Union Validator"""
2+
3+
import unittest
4+
import typing
5+
6+
from parameterized import parameterized
7+
from .base import BaseValidatorTest
8+
9+
from yamlator.types import Data
10+
from yamlator.types import RuleType
11+
from yamlator.types import UnionRuleType
12+
from yamlator.types import SchemaTypes
13+
from yamlator.violations import TypeViolation
14+
from yamlator.validators import UnionValidator
15+
from yamlator.validators.base_validator import Validator
16+
17+
18+
class DummyValidator(Validator):
19+
"""Dummy Validator to test the union validator
20+
21+
This validator will always return a Type violation
22+
23+
"""
24+
def validate(self, key: str, data: Data, parent: str, rtype: RuleType,
25+
is_required: bool = False) -> None:
26+
# Not used by the DummyValidator
27+
del data
28+
del is_required
29+
del rtype
30+
31+
message = 'Invalid type'
32+
violation = TypeViolation(key, parent, message)
33+
self._violations.append(violation)
34+
35+
36+
class TestUnionValidator(BaseValidatorTest):
37+
"""Test cases for the Union Validator"""
38+
39+
@parameterized.expand([
40+
('rule_not_union_type', RuleType(SchemaTypes.FLOAT), 1.23, 0),
41+
('union_rule_with_invalid_data', UnionRuleType([
42+
RuleType(SchemaTypes.LIST, sub_type=RuleType(SchemaTypes.INT)),
43+
RuleType(SchemaTypes.MAP, sub_type=RuleType(SchemaTypes.INT)),
44+
RuleType(SchemaTypes.RULESET, lookup='test'),
45+
RuleType(SchemaTypes.STR)
46+
]), 1.23, 1),
47+
('union_rule_with_valid_data', UnionRuleType([
48+
RuleType(SchemaTypes.LIST, sub_type=RuleType(SchemaTypes.INT)),
49+
RuleType(SchemaTypes.MAP, sub_type=RuleType(SchemaTypes.INT)),
50+
RuleType(SchemaTypes.RULESET, lookup='test'),
51+
RuleType(SchemaTypes.FLOAT)
52+
]), 1.23, 0),
53+
])
54+
def test_union_validation(self, name: str,
55+
rtype: typing.Union[UnionRuleType, RuleType],
56+
data: Data,
57+
expected_violation_count: int):
58+
# Unused by test case, however is required by the parameterized library
59+
del name
60+
61+
validator = UnionValidator(self.violations)
62+
self._set_sub_type_validators(validator)
63+
validator.validate(self.key, data, self.parent, rtype)
64+
65+
actual_violation_count = len(self.violations)
66+
self.assertEqual(expected_violation_count, actual_violation_count)
67+
68+
def _set_sub_type_validators(self, validator: UnionValidator):
69+
dummy_validator = DummyValidator(self.violations)
70+
71+
validator.set_enum_validator(dummy_validator)
72+
validator.set_list_validator(dummy_validator)
73+
validator.set_map_validator(dummy_validator)
74+
validator.set_regex_validator(dummy_validator)
75+
validator.set_ruleset_validator(dummy_validator)
76+
77+
@parameterized.expand([
78+
('rule_not_union_type', RuleType(SchemaTypes.FLOAT), 1.23, 0),
79+
('union_rule_with_invalid_data', UnionRuleType([
80+
RuleType(SchemaTypes.LIST, sub_type=RuleType(SchemaTypes.INT)),
81+
RuleType(SchemaTypes.MAP, sub_type=RuleType(SchemaTypes.INT)),
82+
RuleType(SchemaTypes.RULESET, lookup='test'),
83+
RuleType(SchemaTypes.STR)
84+
]), 1.23, 0),
85+
('union_rule_with_valid_data', UnionRuleType([
86+
RuleType(SchemaTypes.LIST, sub_type=RuleType(SchemaTypes.INT)),
87+
RuleType(SchemaTypes.MAP, sub_type=RuleType(SchemaTypes.INT)),
88+
RuleType(SchemaTypes.RULESET, lookup='test'),
89+
RuleType(SchemaTypes.FLOAT)
90+
]), 1.23, 0),
91+
])
92+
def test_union_validation_without_sub_validators(self, name: str,
93+
rtype: typing.Union[
94+
UnionRuleType,
95+
RuleType
96+
],
97+
data: Data,
98+
expected_violation_count: int): # nopep8 pylint: disable=C0301
99+
# Unused by test case, however is required by the parameterized library
100+
del name
101+
102+
validator = UnionValidator(self.violations)
103+
validator.validate(self.key, data, self.parent, rtype)
104+
105+
actual_violation_count = len(self.violations)
106+
self.assertEqual(expected_violation_count, actual_violation_count)
107+
108+
109+
if __name__ == '__main__':
110+
unittest.main()

0 commit comments

Comments
 (0)