fix Any behaviour in logical operations, fix issues in json schema pa…

…rser and python generator, add _update_spec to ParamsCollector, optimize time type transform
utilmeta · Nov 15, 2024 · 4512cc1 · 4512cc1
1 parent dc876c4
commit 4512cc1
Show file tree

Hide file tree

Showing 10 changed files with 136 additions and 11 deletions.
diff --git a/docs/zh/README.md b/docs/zh/README.md
@@ -332,3 +332,70 @@ utype 是一个 [UtilMeta](https://utilmeta.com) 项目，你可以加入下面
 * [X(Twitter)](https://twitter.com/utilmeta)
 * [Reddit](https://www.reddit.com/r/utilmeta)
 * [中文讨论区](https://lnzhou.com/channels/utilmeta/community)
+
+
+## 对比
+### utype | Pydantic
+Pydantic 是一个流行的 Python 数据解析验证库，utype 提供的功能与 Pydantic 大体上是相近的，但相比之下，utype 在以下方面有更多的关注
+
+* **函数的解析**：utype 能很好的处理各种函数参数与返回值的解析（包括同步函数，异步函数，生成器与异步生成器函数），pydantic 对函数返回值只进行验证，并不尝试进行类型转化，且并不支持生成器函数
+* **约束类型**：对于 utype 来说所有的 **约束** （比如大小，长度，正则等）都会体现在类型中，从而可以直接用来进行类型转化与判断，pydantic 定义的类型往往需要作为字段的注解才能发挥作用
+```python
+>>> from pydantic import PositiveInt
+>>> PositiveInt(-1)
+-1
+>>> from utype.types import PositiveInt
+>>> PositiveInt(-1)
+utype.utils.exceptions.ConstraintError: Constraint: <gt>: 0 violated
+```
+* **类型注册机制**：utype 中所有类型的解析与转化方式都是可以进行注册与覆盖的，也就是说开发者可以方便地自定义基本类型的解析方式，或者注册自定义类型的解析函数；pydantic 支持的解析的内置类型是固定的。由于 utype 的类型解析是注册机制的，所以 utype 也可以兼容解析 **pydantic**, **dataclasses**, **attrs** 等数据类 （参考 [兼容 Pydantic](/zh/guide/type/#pydantic)）
+```python
+from utype import register_transformer  
+from collections.abc import Mapping  
+from pydantic import BaseModel  
+
+@register_transformer(BaseModel)  
+def transform_pydantic(transformer, data, cls):  
+    if not transformer.no_explicit_cast and not isinstance(data, Mapping):  
+        data = transformer(data, dict)  
+    return cls(**data)
+```
+* **逻辑类型**：utype 的类型支持任意嵌套组合的逻辑运算，可以兼容基本类型与 typing 用法，以及支持运算出的类型对数据进行处理（pydantic 没有相应用法）
+```python
+from utype import Rule, exc
+from typing import Literal
+
+class IntWeekDay(int, Rule):  
+	gt = 0
+	le = 7
+
+weekday = IntWeekDay ^ Literal['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+
+>>> weekday('6')
+6
+>>> weekday(b'tue')
+'tue'
+>>> weekday(8)
+Constraint: <le>: 7 violated;
+Constraint: <enum>: ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun') violated
+```
+* **字段模式**：utype 的字段提供了 模式 (`mode`) 机制，包括 `no_input` 与 `no_output` 等，可以在一个数据类中定义字段的多种用法，对于在 web 场景中定义负责 **增改查** 等多种目的的数据模型更加方便
+* **原生字典模型**：pydantic 的 BaseModel 产出的数据实例虽然有 JSON 序列化方法，但并不能被 `json.dumps` 处理，utype 提供继承原生字典的 `Schema` 类，整合到数据工作流中更方便
+```python
+from pydantic import BaseModel
+from utype import Schema
+import json
+
+class md(BaseModel):
+	value: int
+
+class schema(Schema):
+	value: int
+
+>>> json.dumps(md(value=1))
+TypeError: Object of type md is not JSON serializable
+>>> json.dumps(schema(value=1))
+'{"value": 1}'
+```
+
+整体上而言，utype 提供的配置参数更加简洁一些，提供的功更加灵活一些，可以看作一个更加灵活与轻量级的 Pydantic
diff --git a/tests/test_rule.py b/tests/test_rule.py
@@ -97,6 +97,13 @@ class IntWeekDay(int, Rule):
         multi_any = Rule.any_of(dict, list, str, None)
         assert multi_any('str') == 'str'
 
+        # test Any
+        assert types.PositiveInt & types.PositiveInt == types.PositiveInt
+        assert types.PositiveInt | types.Any in (Rule, types.Any)
+        assert types.PositiveInt | None | types.Any in (Rule, types.Any)
+        assert types.PositiveInt & types.Any == types.PositiveInt
+        assert types.PositiveInt ^ types.Any in (Rule, types.Any)
+
     def test_length(self):
         class Length3(Rule):
             length = 3

diff --git a/tests/test_spec.py b/tests/test_spec.py
@@ -0,0 +1,14 @@
+from utype.types import *
+from utype.parser.rule import Rule
+
+
+class TestSpec:
+    def test_json_schema_parser(self):
+        from utype.specs.json_schema.parser import JsonSchemaParser
+        from utype.specs.python.generator import PythonCodeGenerator
+        assert JsonSchemaParser({})() == Any
+        assert JsonSchemaParser({'anyOf': [{}, {'type': 'null'}]})() in (Rule, Any)
+        assert JsonSchemaParser({'type': 'object'})() == dict
+        assert JsonSchemaParser({'type': 'array'})() == list
+        assert JsonSchemaParser({'type': 'string'})() == str
+        assert JsonSchemaParser({'type': 'string', 'format': 'date'})() == date
diff --git a/tests/test_type.py b/tests/test_type.py
@@ -332,6 +332,11 @@ def trans_my(trans, d, t):
             ],
             time: [
                 ("11:12:13", time(11, 12, 13), True, True),
+                ("08:09:10", time(8, 9, 10), True, True),
+                ("08:09:10", time(8, 9, 10), True, True),
+                ("8:9:10", time(8, 9, 10), True, True),
+                ("8:9:1", time(8, 9, 1), True, True),
+                ("8:30", time(8, 30, 0), True, True),
                 (b"11:12:13", time(11, 12, 13), True, True),
                 (dt, dt.time(), True, False),
                 (dt.date(), time(), True, False),

diff --git a/utype/parser/rule.py b/utype/parser/rule.py
@@ -159,6 +159,9 @@ def _parse_arg(mcs, arg):
         if isinstance(arg, mcs):
             return arg
 
+        if arg in (Any, Self):
+            return arg
+
         __origin = get_origin(arg)
         if __origin:
             # like List[str] Literal["value"]
@@ -233,10 +236,24 @@ def combine(mcs, operator: str, *args):
 
             arg = mcs._parse_arg(arg)
 
+            if arg == Any:
+                if operator in ('|', '^'):
+                    # if Any in any_of, there will be just Any (or Rule)
+                    return Rule
+                elif operator == '&':
+                    # if Any in and, just ignore
+                    continue
+
             if arg in __args:
                 # avoid duplicate
                 continue
             __args.append(arg)
+        if not __args:
+            return Rule
+        if operator != '~':
+            # for operation other than not, if just 1 arg left, use that
+            if len(__args) == 1:
+                return __args[0]
 
         return mcs(
             OPERATOR_NAMES.get(operator, operator),

diff --git a/utype/specs/json_schema/constant.py b/utype/specs/json_schema/constant.py
@@ -15,12 +15,14 @@
 }
 TYPE_MAP = {
     'null': type(None),
+    'string': str,
     'boolean': bool,
     'bool': bool,
     'object': dict,
     'array': list,
     'integer': int,
     'int': int,
+    'bigint': int,
     'number': float,
     'float': float,
     'decimal': Decimal,

diff --git a/utype/specs/json_schema/parser.py b/utype/specs/json_schema/parser.py
@@ -18,7 +18,7 @@ class JsonSchemaParser:
     object_meta_cls = LogicalMeta
     object_options_cls = Options
     field_cls = Field
-    default_type = str
+    default_type = Any
 
     NON_NAME_REG = '[^A-Za-z0-9]+'
 
@@ -217,6 +217,8 @@ def parse_object(self,
                      description: str = None,
                      constraints: dict = None
                      ):
+        if list(schema) == ['type'] and not constraints:
+            return dict
         name = name or 'ObjectSchema'
         properties = schema.get('properties') or {}
         required = schema.get('required') or []
@@ -305,6 +307,8 @@ def parse_array(self,
                     description: str = None,
                     constraints: dict = None
                     ):
+        if list(schema) == ['type'] and not constraints:
+            return list
         items = schema.get('items')
         prefix_items = schema.get('prefixItems')
         args = []

diff --git a/utype/specs/python/generator.py b/utype/specs/python/generator.py
@@ -2,14 +2,12 @@
 import keyword
 import re
 
-import utype
 from utype.parser.rule import Rule, LogicalType
 from utype.parser.field import Field
 from utype.parser.cls import ClassParser
 from utype.parser.func import FunctionParser
 from utype import unprovided, Options
-
-from typing import Type, Dict, ForwardRef
+from typing import Type, Dict, Any, ForwardRef
 from utype.utils.functional import represent, valid_attr
 from collections import deque
 
@@ -98,10 +96,11 @@ def generate_for_function(self, f, force_forward_ref: bool = None) -> str:
                     default = self.generate_for_field(param_default)
                 else:
                     default = represent(param_default)
-                if len(args) == 1:
-                    args.append(f'={default}')
-                else:
-                    args.append(f' = {default}')
+                if default:
+                    if len(args) == 1:
+                        args.append(f'={default}')
+                    else:
+                        args.append(f' = {default}')
             params.append(''.join(args))
 
         return_annotation = None
@@ -122,7 +121,7 @@ def generate_for_type(self, t, with_constraints: bool = True, annotation: bool =
             return t
         if isinstance(t, ForwardRef):
             return repr(t.__forward_arg__)
-        if not isinstance(t, type):
+        if not isinstance(t, type) or t in (Any, Rule):
             return 'Any'
         if isinstance(t, LogicalType):
             if t.combinator:
@@ -183,7 +182,7 @@ def generate_for_rule(self, t: Type[Rule], with_constraints: bool = True, annota
 
     @classmethod
     def generate_for_field(cls, field: Field, addition: dict = None) -> str:
-        if not field.__spec_kwargs__ and not addition:
+        if not field.__spec_kwargs__ and not addition and field.__class__ == Field:
             return ''
         name = None
         if field.__class__ == Field:

diff --git a/utype/utils/base.py b/utype/utils/base.py
@@ -276,6 +276,12 @@ def __copy(cls, data, copy_class: bool = False):
             return data.__copy__()
         return data
 
+    def _update_spec(self, **kwargs):
+        # this is a rather ugly patch, we will figure something more elegantly in future
+        spec = dict(self.__spec_kwargs__)
+        spec.update(kwargs)
+        self.__spec_kwargs__ = ImmutableDict(spec)
+
     def __deepcopy__(self, memo):
         return self.__copy__()
 

diff --git a/utype/utils/transform.py b/utype/utils/transform.py
@@ -612,7 +612,11 @@ def to_time(self, data, t: Type[time] = time) -> time:
                 return t()
         data = self._from_byte_like(data)
         if isinstance(data, str):
-            return t.fromisoformat(data)
+            if ':' in data:
+                try:
+                    return t.fromisoformat(data)
+                except ValueError:
+                    return self.to_datetime(f'1970-01-01 {data}').time()
         raise TypeError
 
     @registry.register(UUID)