diff --git a/.circleci/config.yml b/.circleci/config.yml index d67f59c0f..0877047dc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,7 +7,11 @@ jobs: - checkout - run: docker build -t protoc-gen-validate . - run: docker run --rm protoc-gen-validate ci - + - run: | + if [ "${CIRCLE_BRANCH}" == "main" ]; then + docker run --rm --env PYPI_REPO=pypi --env PGV_PYPI_TOKEN="${PGV_PYPI_TOKEN}" protoc-gen-validate python-release + fi + javabuild: machine: true working_directory: ~/.go_workspace/src/github.com/envoyproxy/protoc-gen-validate/java @@ -19,13 +23,13 @@ jobs: - checkout: path: ~/.go_workspace/src/github.com/envoyproxy/protoc-gen-validate - run: sudo rm -rf /usr/local/go && curl -O https://dl.google.com/go/go${CI_GO_VERSION}.linux-amd64.tar.gz && sudo tar -C /usr/local -xzf go${CI_GO_VERSION}.linux-amd64.tar.gz && rm go${CI_GO_VERSION}.linux-amd64.tar.gz && go version - - run: mvn -B verify + - run: mvn -B verify - add_ssh_keys: fingerprints: - "cd:a8:80:f3:5e:9a:37:30:ef:55:20:b5:1f:b9:e5:18" - deploy: - command: | # Deploy if on master branch. If the $RELEASE and $NEXT variables are set then prepare a full maven release. - if [ "${CIRCLE_BRANCH}" == "master" ]; then + command: | # Deploy if on main branch. If the $RELEASE and $NEXT variables are set then prepare a full maven release. + if [ "${CIRCLE_BRANCH}" == "main" ]; then echo $GPG_KEY | base64 --decode > signing-key gpg --passphrase $GPG_PASSPHRASE --import signing-key shred signing-key @@ -44,7 +48,3 @@ workflows: jobs: - build - javabuild - - - - diff --git a/.gitignore b/.gitignore index 468cfccfd..15fdf8475 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,17 @@ bazel-* - +**/.DS_Store !vendor/* /bin /protoc-gen-validate +/python/LICENSE +/python/validate.proto +/python/dist/ +*.egg-info/ +__pycache__/ +*.py[cod] + /tests/harness/cases/go /tests/harness/cases/gogo /tests/harness/cases/other_package/go diff --git a/Dockerfile b/Dockerfile index 385cc8cda..68dbdcf8b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,11 +12,17 @@ ENV INSTALL_DEPS \ wget \ maven \ patch \ - python + python3.8 \ + python3.8-distutils \ + python3-setuptools RUN apt-get update \ && apt-get install -y -q --no-install-recommends curl openjdk-8-jdk \ && echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list \ && curl https://bazel.build/bazel-release.pub.gpg | apt-key add - \ + # software-properties-common allows adding PPA (Personal Package Archive) repositories + && apt install -y -q --no-install-recommends software-properties-common \ + # deadsnakes is a PPA with newer releases of python than default Ubuntu repositories + && add-apt-repository ppa:deadsnakes/ppa \ && apt-get update \ && apt-get install -y -q --no-install-recommends ${INSTALL_DEPS} \ && apt-get clean \ @@ -62,6 +68,14 @@ RUN go get github.com/bazelbuild/buildtools/buildozer WORKDIR ${GOPATH}/src/github.com/envoyproxy/protoc-gen-validate COPY . . +# python must be on PATH for the execution of py_binary bazel targets, but +# the distribution we installed doesn't provide this alias +RUN ln -s /usr/bin/python3.8 /usr/bin/python + +# python tooling for linting and uploading to PyPI +RUN python3.8 -m easy_install pip \ + && python3.8 -m pip install -r requirements.txt + RUN make build ENTRYPOINT ["make"] diff --git a/Makefile b/Makefile index d064bbfe4..b460f647d 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,9 @@ lint: bin/golint bin/shadow # golint -set_exit_status # check for variable shadowing go vet -vettool=$(shell pwd)/bin/shadow ./... + # lints the python code for style enforcement + flake8 --config=python/setup.cfg python/protoc_gen_validate/validator.py + isort --check-only python/protoc_gen_validate/validator.py bin/shadow: GOBIN=$(shell pwd)/bin go install golang.org/x/tools/go/analysis/passes/shadow/cmd/shadow @@ -115,6 +118,23 @@ tests/harness/java/java-harness: # generates the Java-specific test harness mvn -q -f java/pom.xml clean package -DskipTests +.PHONY: prepare-python-release +prepare-python-release: + cp validate/validate.proto python/ + cp LICENSE python/ + +.PHONY: python-release +python-release: prepare-python-release + rm -rf python/dist + python3.8 -m build --no-isolation --sdist python + # the below command should be identical to `python3.8 -m build --wheel` + # however that returns mysterious `error: could not create 'build': File exists`. + # setuptools copies source and data files to a temporary build directory, + # but why there's a collision or why setuptools stopped respecting the `build_lib` flag is unclear. + # As a workaround, we build a source distribution and then separately build a wheel from it. + python3.8 -m pip wheel --wheel-dir python/dist --no-deps python/dist/* + python3.8 -m twine upload --verbose --skip-existing --repository ${PYPI_REPO} --username "__token__" --password ${PGV_PYPI_TOKEN} python/dist/* + .PHONY: ci ci: lint bazel testcases bazel-tests build_generation_tests @@ -130,5 +150,6 @@ clean: rm -rf \ tests/harness/cases/go \ tests/harness/cases/other_package/go - - + rm -rf \ + python/dist + python/*.egg-info diff --git a/README.md b/README.md index 5da6ad3a2..2cee19140 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ make build - `go` - `cc` for c++ (partially implemented) - `java` - - `python` +- Note: Python works via runtime code generation. There's no compile-time generation. See the Python section for details. ### Examples @@ -194,28 +194,25 @@ serverBuilder.addService(ServerInterceptors.intercept(svc, new ValidatingServerI #### Python -Since Python is a dynamically typed language, it works with JIT code generation. So protoc does not need to be run to generate code. - -The file `validate/validator.py` has a `validate()` method which needs to be run with an instance of the proto you are validating. - -You must install all the dependencies in the `requirements.txt` before running the validator. +The python implementation works via JIT code generation. In other words, the `validate(msg)` function is written +on-demand and [exec-ed](https://docs.python.org/3/library/functions.html#exec). An LRU-cache improves performance by +storing generated functions per descriptor. + +The python package is available on [PyPI](https://pypi.org/project/protoc-gen-validate). To run `validate()`, do the following: -``` -from validator import validate, FailedValidation - -p = Person() -validate(p) # This should either return None or raise a ValidationFailed exception. +```python +from entities_pb2 import Person +from protoc_gen_validate.validator import validate, ValidationFailed + +p = Person(first_name="Foo", last_name="Bar", age=42) +try: + validate(p) +except ValidationFailed as err: + print(err) ``` -To see what code has been generated and run, you can do the following: -``` -from validator import validate, print_validate, FailedValidation - -p = Person() -validate(p) -printer = print_validate(p) -``` +You can view what code has been generated by using the `print_validate()` function. ## Constraint Rules diff --git a/WORKSPACE b/WORKSPACE index 13a835dda..29e4f9d6f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -8,10 +8,6 @@ load("//bazel:dependency_imports.bzl", "pgv_dependency_imports") pgv_dependency_imports() -load("//bazel:pip_dependencies.bzl", "pgv_pip_dependencies") - -pgv_pip_dependencies() - load("//:dependencies.bzl", "go_third_party") # gazelle:repository_macro dependencies.bzl%go_third_party diff --git a/bazel/dependency_imports.bzl b/bazel/dependency_imports.bzl index bdcf51b9a..4eb5f2ee5 100644 --- a/bazel/dependency_imports.bzl +++ b/bazel/dependency_imports.bzl @@ -1,19 +1,15 @@ load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies") load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_dependencies") -load("@io_bazel_rules_python//python:pip.bzl", "pip_import", "pip_repositories") load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies", "rules_proto_toolchains") - -# Only needed for PIP support: +load("@rules_python//python:pip.bzl", "pip_install") def _pgv_pip_dependencies(): - pip_repositories() - - # This rule translates the specified requirements.txt into - # @pgv_pip_deps//:requirements.bzl, which itself exposes a pip_install method. - pip_import( + # This rule translates the specified requirements.in (which must be same as install_requires from setup.cfg) + # into @pgv_pip_deps//:requirements.bzl. + pip_install( name = "pgv_pip_deps", - requirements = "//:requirements.txt", + requirements = "//python:requirements.in", ) def _pgv_go_dependencies(): @@ -27,7 +23,7 @@ def pgv_dependency_imports(): # Import @com_google_protobuf's dependencies. protobuf_deps() - # Import @pgv_pip_deps defined by pip's requirements.txt. + # Import @pgv_pip_deps defined by python/requirements.in. _pgv_pip_dependencies() # Import rules for the Go compiler. diff --git a/bazel/pgv_proto_library.bzl b/bazel/pgv_proto_library.bzl index 324c36e08..67f9b556b 100644 --- a/bazel/pgv_proto_library.bzl +++ b/bazel/pgv_proto_library.bzl @@ -1,8 +1,7 @@ load("@io_bazel_rules_go//proto:compiler.bzl", "go_proto_compiler") load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") load("@rules_cc//cc:defs.bzl", "cc_library") -load("@rules_python//python:defs.bzl", "py_library") -load(":protobuf.bzl", "cc_proto_gen_validate", "java_proto_gen_validate", "python_proto_gen_validate") +load(":protobuf.bzl", "cc_proto_gen_validate", "java_proto_gen_validate") def pgv_go_proto_library(name, proto = None, deps = [], **kwargs): go_proto_compiler( @@ -63,30 +62,4 @@ def pgv_cc_proto_library( **kargs ) -def pgv_python_proto_library( - name, - deps = [], - python_deps = [], - **kwargs): - """Bazel rule to create a Python protobuf validation library from proto sources files. - Args: - name: the name of the pgv_python_proto_library - deps: proto_library rules that contain the necessary .proto files - python_deps: Python dependencies of the protos being compiled. Likely py_proto_library or pgv_python_proto_library. - **kwargs: other keyword arguments that are passed to the py_library. - """ - - python_proto_gen_validate( - name = name + "_validate", - deps = deps, - ) - py_library( - name = name, - srcs = [name + "_validate"], - deps = python_deps + [ - "@com_envoyproxy_protoc_gen_validate//validate:validate_py", - ], - **kwargs - ) - pgv_java_proto_library = java_proto_gen_validate diff --git a/bazel/pip_dependencies.bzl b/bazel/pip_dependencies.bzl deleted file mode 100644 index 3e877efa1..000000000 --- a/bazel/pip_dependencies.bzl +++ /dev/null @@ -1,4 +0,0 @@ -load("@pgv_pip_deps//:requirements.bzl", "pip_install") - -def pgv_pip_dependencies(): - pip_install() diff --git a/bazel/protobuf.bzl b/bazel/protobuf.bzl index b971ad232..c26e49815 100644 --- a/bazel/protobuf.bzl +++ b/bazel/protobuf.bzl @@ -108,40 +108,6 @@ def _protoc_gen_validate_cc_impl(ctx): package_command = "true", ) -def _protoc_python_output_files(ctx, proto_file_sources): - python_srcs = [] - - for p in proto_file_sources: - # The returned path needs to be relative to the package directory. - file_path = _package_relative_path(ctx, p) - if file_path.endswith(".proto"): - file_path = file_path[:-len(".proto")] - - python_srcs.append(file_path.replace("-", "_") + "_pb2.py") - return python_srcs - -def _protoc_gen_validate_python_impl(ctx): - """Generate Python protos using protoc-gen-validate plugin""" - protos = _proto_sources(ctx) - - python_files = _protoc_python_output_files(ctx, protos) - out_files = [ctx.actions.declare_file(out) for out in python_files] - - dir_out = _output_dir(ctx) - - args = [ - "--python_out=" + dir_out, - ] - - return _protoc_gen_validate_impl( - ctx = ctx, - lang = "python", - protos = protos, - out_files = out_files, - protoc_args = args, - package_command = "true", - ) - def _protoc_gen_validate_impl(ctx, lang, protos, out_files, protoc_args, package_command): protoc_args.append("--plugin=protoc-gen-validate=" + ctx.executable._plugin.path) @@ -317,26 +283,3 @@ java_proto_gen_validate = rule( }, implementation = _java_proto_gen_validate_impl, ) - -python_proto_gen_validate = rule( - attrs = { - "deps": attr.label_list( - mandatory = True, - providers = [ProtoInfo], - ), - "_protoc": attr.label( - cfg = "host", - default = Label("@com_google_protobuf//:protoc"), - executable = True, - allow_single_file = True, - ), - "_plugin": attr.label( - cfg = "host", - default = Label("@com_envoyproxy_protoc_gen_validate//:protoc-gen-validate"), - allow_files = True, - executable = True, - ), - }, - output_to_genfiles = True, - implementation = _protoc_gen_validate_python_impl, -) diff --git a/bazel/repositories.bzl b/bazel/repositories.bzl index feb532045..f59b1874b 100644 --- a/bazel/repositories.bzl +++ b/bazel/repositories.bzl @@ -126,12 +126,11 @@ def pgv_dependencies(): server_urls = MAVEN_SERVER_URLS, ) - if not native.existing_rule("io_bazel_rules_python"): - git_repository( - name = "io_bazel_rules_python", - remote = "https://github.com/bazelbuild/rules_python.git", - commit = "fdbb17a4118a1728d19e638a5291b4c4266ea5b8", - shallow_since = "1557865590 -0400", + if not native.existing_rule("rules_python"): + http_archive( + name = "rules_python", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz", + sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0", ) if not native.existing_rule("rules_proto"): diff --git a/python/BUILD b/python/BUILD new file mode 100644 index 000000000..91286d031 --- /dev/null +++ b/python/BUILD @@ -0,0 +1,11 @@ +load("@rules_python//python:defs.bzl", "py_library") +load("@pgv_pip_deps//:requirements.bzl", "all_requirements") + +exports_files(["requirements.in", "setup.cfg"]) + +py_library( + name = "validator_py", + srcs = glob(["**/*.py"]), + deps = all_requirements, + visibility = ["//visibility:public"], +) diff --git a/python/README.md b/python/README.md new file mode 100644 index 000000000..017e68eca --- /dev/null +++ b/python/README.md @@ -0,0 +1,21 @@ +# Protoc-gen-validate (PGV) +While protocol buffers effectively guarantee the types of structured data, +they cannot enforce semantic rules for values. This package is a python implementation +of [protoc-gen-validate][pgv-home], which allows for runtime validation of various +semantic assertions expressed as annotations on the protobuf schema. The syntax for all available annotations is +in `validate.proto`. Implemented Python annotations are listed in the [rules comparison][rules-comparison]. + +### Example +```python3 +from entities_pb2 import Person +from protoc_gen_validate.validator import validate, ValidationFailed + +p = Person(first_name="Foo", last_name="Bar", age=42) +try: + validate(p) +except ValidationFailed as err: + print(err) +``` + +[pgv-home]: https://github.com/envoyproxy/protoc-gen-validate +[rules-comparison]: https://github.com/envoyproxy/protoc-gen-validate/blob/main/rule_comparison.md \ No newline at end of file diff --git a/python/protoc_gen_validate/__init__.py b/python/protoc_gen_validate/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/validate/validator.py b/python/protoc_gen_validate/validator.py similarity index 88% rename from validate/validator.py rename to python/protoc_gen_validate/validator.py index 225507b08..0c1205bea 100644 --- a/validate/validator.py +++ b/python/protoc_gen_validate/validator.py @@ -1,16 +1,15 @@ import re -from functools import lru_cache -from validate_email import validate_email -import ipaddress -try: - import urlparse -except ImportError: - import urllib.parse as urlparse -import uuid import struct -from jinja2 import Template -import time import sys +import time +import uuid +from functools import lru_cache +from ipaddress import IPv4Address, IPv6Address, ip_address +from urllib import parse as urlparse + +from google.protobuf.message import Message +from jinja2 import Template +from validate_email import validate_email printer = "" @@ -22,6 +21,11 @@ "HEADER_STRING": r'^[^\u0000\u000A\u000D]*$' } + +class ValidationFailed(Exception): + pass + + class ValidatingMessage(object): """Wrap a proto message to cache validate functions with the message class name. @@ -41,13 +45,14 @@ def __eq__(self, other): else: return False -def validate(proto_message): - return _validate_inner(ValidatingMessage(proto_message)) -# Cache generated functions to avoid the performance issue caused by repeated proto messages, -# which generate the same functions repeatedly. +def validate(proto_message: Message): + return _validate_inner(ValidatingMessage(proto_message))(proto_message) + + +# Cache generated functions with the message descriptor's full_name as the cache key @lru_cache() -def _validate_inner(proto_message): +def _validate_inner(proto_message: Message): func = file_template(proto_message) global printer printer += func + "\n" @@ -57,9 +62,11 @@ def _validate_inner(proto_message): except NameError: return locals()['generate_validate'] -def print_validate(proto_message): + +def print_validate(): return "".join([s for s in printer.splitlines(True) if s.strip()]) + def has_validate(field): if field.GetOptions() is None: return False @@ -68,12 +75,14 @@ def has_validate(field): return True return False + def byte_len(s): try: return len(s.encode('utf-8')) - except: + except: # noqa return len(s) + def _validateHostName(host): if not host: return False @@ -95,8 +104,10 @@ def _validateHostName(host): return False return True + def _validateEmail(addr): - if '<' in addr and '>' in addr: addr = addr.split("<")[1].split(">")[0] + if '<' in addr and '>' in addr: + addr = addr.split("<")[1].split(">")[0] if not validate_email(addr): return False @@ -109,16 +120,18 @@ def _validateEmail(addr): return False return _validateHostName(parts[1]) + def _has_field(message_pb, property_name): # NOTE: As of proto3, HasField() only works for message fields, not for # singular (non-message) fields. First try to use HasField and # if it fails (with a ValueError) we manually consult the fields. try: return message_pb.HasField(property_name) - except: + except: # noqa all_fields = set([field.name for field in message_pb.DESCRIPTOR.fields]) return property_name in all_fields + def const_template(option_value, name): const_tmpl = """{%- if str(o.string) and o.string.HasField('const') -%} if {{ name }} != \"{{ o.string['const'] }}\": @@ -139,7 +152,8 @@ def const_template(option_value, name): {% endif %} {%- endif -%} """ - return Template(const_tmpl).render(sys = sys, o = option_value, name = name, str = str) + return Template(const_tmpl).render(sys=sys, o=option_value, name=name, str=str) + def in_template(value, name): in_tmpl = """ @@ -152,17 +166,18 @@ def in_template(value, name): raise ValidationFailed(\"{{ name }} in {{ value['not_in'] }}\") {%- endif -%} """ - return Template(in_tmpl).render(value = value, name = name) + return Template(in_tmpl).render(value=value, name=name) + def string_template(option_value, name): if option_value.string.well_known_regex: - known_regex_type = option_value.string.DESCRIPTOR.fields_by_name['well_known_regex'].enum_type - regex_value = option_value.string.well_known_regex - regex_name = known_regex_type.values_by_number[regex_value].name - if regex_name in ["HTTP_HEADER_NAME", "HTTP_HEADER_VALUE"] and not option_value.string.strict: - option_value.string.pattern = regex_map["HEADER_STRING"] - else: - option_value.string.pattern = regex_map[regex_name] + known_regex_type = option_value.string.DESCRIPTOR.fields_by_name['well_known_regex'].enum_type + regex_value = option_value.string.well_known_regex + regex_name = known_regex_type.values_by_number[regex_value].name + if regex_name in ["HTTP_HEADER_NAME", "HTTP_HEADER_VALUE"] and not option_value.string.strict: + option_value.string.pattern = regex_map["HEADER_STRING"] + else: + option_value.string.pattern = regex_map[regex_name] str_templ = """ {%- set s = o.string -%} {% set i = 0 %} @@ -227,26 +242,26 @@ def string_template(option_value, name): {%- endif -%} {%- if s['address'] %} try: - ipaddress.ip_address({{ name }}) + ip_address({{ name }}) except ValueError: if not _validateHostName({{ name }}): raise ValidationFailed(\"{{ name }} is not a valid address\") {%- endif -%} {%- if s['ip'] %} try: - ipaddress.ip_address({{ name }}) + ip_address({{ name }}) except ValueError: raise ValidationFailed(\"{{ name }} is not a valid ip\") {%- endif -%} {%- if s['ipv4'] %} try: - ipaddress.IPv4Address({{ name }}) + IPv4Address({{ name }}) except ValueError: raise ValidationFailed(\"{{ name }} is not a valid ipv4\") {%- endif -%} {%- if s['ipv6'] %} try: - ipaddress.IPv6Address({{ name }}) + IPv6Address({{ name }}) except ValueError: raise ValidationFailed(\"{{ name }} is not a valid ipv6\") {%- endif %} @@ -268,7 +283,8 @@ def string_template(option_value, name): {%- endif -%} {% endfilter %} """ - return Template(str_templ).render(o = option_value, name = name, const_template = const_template, in_template = in_template) + return Template(str_templ).render(o=option_value, name=name, const_template=const_template, in_template=in_template) + def required_template(value, name): req_tmpl = """{%- if value['required'] -%} @@ -276,9 +292,10 @@ def required_template(value, name): raise ValidationFailed(\"{{ name }} is required.\") {%- endif -%} """ - return Template(req_tmpl).render(value = value, name = name) + return Template(req_tmpl).render(value=value, name=name) -def message_template(option_value, name, repeated = False): + +def message_template(option_value, name, repeated=False): message_tmpl = """{%- if m.message %} {{- required_template(m.message, name) }} {%- endif -%} @@ -290,18 +307,21 @@ def message_template(option_value, name, repeated = False): {% else %} if _has_field(p, \"{{ name.split('.')[-1] }}\"): {% endif %} - embedded = validate(p.{{ name }})(p.{{ name }}) + embedded = validate(p.{{ name }}) if embedded is not None: return embedded {%- endif -%} """ - return Template(message_tmpl).render(m = option_value, name = name, required_template = required_template, repeated = repeated) + return Template(message_tmpl).render( + m=option_value, name=name, required_template=required_template, repeated=repeated) + def bool_template(option_value, name): bool_tmpl = """ {{ const_template(o, name) -}} """ - return Template(bool_tmpl).render(o = option_value, name = name, const_template = const_template) + return Template(bool_tmpl).render(o=option_value, name=name, const_template=const_template) + def num_template(option_value, name, num): num_tmpl = """ @@ -371,7 +391,8 @@ def num_template(option_value, name, num): {%- endif -%} {% endfilter %} """ - return Template(num_tmpl).render(o = option_value, name = name, num = num, in_template = in_template, str = str) + return Template(num_tmpl).render(o=option_value, name=name, num=num, in_template=in_template, str=str) + def dur_arr(dur): value = 0 @@ -383,11 +404,13 @@ def dur_arr(dur): value = 0 return arr + def dur_lit(dur): value = dur.seconds + (10**-9 * dur.nanos) return value -def duration_template(option_value, name, repeated = False): + +def duration_template(option_value, name, repeated=False): dur_tmpl = """ {{- required_template(o.duration, name) }} {% if repeated %} @@ -471,9 +494,11 @@ def duration_template(option_value, name, repeated = False): raise ValidationFailed(\"{{ name }} is not greater than or equal to {{ dur_lit(dur['gte']) }}\") {%- endif -%} """ - return Template(dur_tmpl).render(o = option_value, name = name, required_template = required_template, dur_lit = dur_lit, dur_arr = dur_arr, repeated = repeated) + return Template(dur_tmpl).render(o=option_value, name=name, required_template=required_template, + dur_lit=dur_lit, dur_arr=dur_arr, repeated=repeated) + -def timestamp_template(option_value, name, repeated = False): +def timestamp_template(option_value, name, repeated=False): timestamp_tmpl = """ {{- required_template(o.timestamp, name) }} {% if repeated %} @@ -582,9 +607,11 @@ def timestamp_template(option_value, name, repeated = False): raise ValidationFailed(\"{{ name }} is not within range {{ dur_lit(ts['within']) }}\") {%- endif -%} """ - return Template(timestamp_tmpl).render(o = option_value, name = name, required_template = required_template, dur_lit = dur_lit, dur_arr = dur_arr, repeated = repeated) + return Template(timestamp_tmpl).render(o=option_value, name=name, required_template=required_template, + dur_lit=dur_lit, dur_arr=dur_arr, repeated=repeated) + -def wrapper_template(option_value, name, repeated = False): +def wrapper_template(option_value, name, repeated=False): wrapper_tmpl = """ {% if repeated %} if {{ name }}: @@ -623,11 +650,15 @@ def wrapper_template(option_value, name, repeated = False): raise ValidationFailed(\"{{ name }} is required.\") {%- endif %} """ - return Template(wrapper_tmpl).render(option_value = option_value, name = name, str = str, num_template = num_template, bool_template = bool_template, string_template = string_template, bytes_template = bytes_template, repeated = repeated) + return Template(wrapper_tmpl).render(option_value=option_value, name=name, str=str, num_template=num_template, + bool_template=bool_template, string_template=string_template, + bytes_template=bytes_template, repeated=repeated) + def enum_values(field): return [x.number for x in field.enum_type.values] + def enum_template(option_value, name, field): enum_tmpl = """ {{ const_template(option_value, name) -}} @@ -637,9 +668,11 @@ def enum_template(option_value, name, field): raise ValidationFailed(\"{{ name }} is not defined\") {% endif %} """ - return Template(enum_tmpl).render(option_value = option_value, name = name, const_template = const_template, in_template = in_template, field = field, enum_values = enum_values) + return Template(enum_tmpl).render(option_value=option_value, name=name, const_template=const_template, + in_template=in_template, field=field, enum_values=enum_values) + -def any_template(option_value, name, repeated = False): +def any_template(option_value, name, repeated=False): any_tmpl = """ {{- required_template(o, name) }} {%- if o['in'] %} @@ -661,7 +694,9 @@ def any_template(option_value, name, repeated = False): raise ValidationFailed(\"{{ name }} in {{ o['not_in'] }}\") {%- endif %} """ - return Template(any_tmpl).render(o = option_value.any, name = name, required_template = required_template, repeated = repeated) + return Template(any_tmpl).render( + o=option_value.any, name=name, required_template=required_template, repeated=repeated) + def bytes_template(option_value, name): bytes_tmpl = """ @@ -687,19 +722,19 @@ def bytes_template(option_value, name): {%- endif -%} {%- if b['ip'] %} try: - ipaddress.ip_address({{ name }}) + ip_address({{ name }}) except ValueError: raise ValidationFailed(\"{{ name }} is not a valid ip\") {%- endif -%} {%- if b['ipv4'] %} try: - ipaddress.IPv4Address({{ name }}) + IPv4Address({{ name }}) except ValueError: raise ValidationFailed(\"{{ name }} is not a valid ipv4\") {%- endif -%} {%- if b['ipv6'] %} try: - ipaddress.IPv6Address({{ name }}) + IPv6Address({{ name }}) except ValueError: raise ValidationFailed(\"{{ name }} is not a valid ipv6\") {%- endif -%} @@ -741,9 +776,11 @@ def bytes_template(option_value, name): {% endif %} {% endfilter %} """ - return Template(bytes_tmpl).render(sys=sys,o = option_value, name = name, const_template = const_template, in_template = in_template, b = option_value.bytes) + return Template(bytes_tmpl).render(sys=sys, o=option_value, name=name, + const_template=const_template, in_template=in_template, b=option_value.bytes) + -def switcher_template(accessor, name, field, map = False): +def switcher_template(accessor, name, field, map=False): switcher_tmpl = """ {%- if str(accessor.float) %} {{- num_template(accessor, name, accessor.float)|indent(4,True) -}} @@ -789,7 +826,12 @@ def switcher_template(accessor, name, field, map = False): {{- message_template(accessor, name, True)|indent(4,True) -}} {%- endif %} """ - return Template(switcher_tmpl).render(accessor = accessor, name = name, str = str, num_template = num_template, bool_template = bool_template, string_template = string_template, enum_template = enum_template, duration_template = duration_template, timestamp_template = timestamp_template, any_template = any_template, message_template = message_template, field = field, map = map) + return Template(switcher_tmpl).render(accessor=accessor, name=name, str=str, num_template=num_template, + bool_template=bool_template, string_template=string_template, + enum_template=enum_template, duration_template=duration_template, + timestamp_template=timestamp_template, any_template=any_template, + message_template=message_template, field=field, map=map) + def repeated_template(option_value, name, field): rep_tmpl = """ @@ -820,7 +862,7 @@ def repeated_template(option_value, name, field): {%- if o and o.repeated and o.repeated.items.message.skip %} pass {% else %} - validate(item)(item) + validate(item) {% endif %} {%- endif %} {%- if o and str(o.repeated['items']) %} @@ -831,11 +873,14 @@ def repeated_template(option_value, name, field): {%- endif %} {% endfilter %} """ - return Template(rep_tmpl).render(o = option_value, name = name, message_type = field.message_type, str = str, field = field, switcher_template = switcher_template) + return Template(rep_tmpl).render(o=option_value, name=name, message_type=field.message_type, + str=str, field=field, switcher_template=switcher_template) + def is_map(field): return field.label == 3 and field.message_type and len(field.message_type.fields) == 2 and \ - field.message_type.fields[0].name == "key" and field.message_type.fields[1].name == "value" + field.message_type.fields[0].name == "key" and field.message_type.fields[1].name == "value" + def map_template(option_value, name, field): map_tmpl = """ @@ -853,9 +898,6 @@ def map_template(option_value, name, field): if len({{ name }}) > {{ o.map['max_pairs'] }}: raise ValidationFailed(\"{{ name }} can contain at most {{ o.map['max_pairs'] }} items\") {%- endif %} - {%- if o and o.map['no_sparse'] -%} - raise UnimplementedException(\"no_sparse validation is not implemented because protobuf maps cannot be sparse in Python\") - {%- endif %} {%- if o and (str(o.map['keys']) or str(o.map['values']))%} for key in {{ name }}: {%- set keys = o.map['keys'] -%} @@ -892,19 +934,22 @@ def map_template(option_value, name, field): pass {%- elif field.message_type.fields[1].message_type %} for key in {{ name }}: - validate({{ name }}[key])({{ name }}[key]) + validate({{ name }}[key]) {%- endif %} {% endfilter %} """ - return Template(map_tmpl).render(o = option_value, name = name, message_type = field.message_type, str = str, field = field, switcher_template = switcher_template, num_template = num_template, string_template = string_template, bool_template = bool_template) + return Template(map_tmpl).render(o=option_value, name=name, message_type=field.message_type, str=str, + field=field, switcher_template=switcher_template, num_template=num_template, + string_template=string_template, bool_template=bool_template) + def rule_type(field): - name = "p."+ field.name + name = "p." + field.name if has_validate(field) and field.message_type is None: for option_descriptor, option_value in field.GetOptions().ListFields(): if option_descriptor.full_name == "validate.rules": if str(option_value.string): - return string_template(option_value, name ) + return string_template(option_value, name) elif str(option_value.message): return message_template(option_value, field.name) elif str(option_value.bool): @@ -974,6 +1019,7 @@ def rule_type(field): return message_template(None, field.name) return "" + def file_template(proto_message): file_tmp = """ # Validates {{ p.DESCRIPTOR.name }} @@ -1005,10 +1051,4 @@ def generate_validate(p): {%- endif %} {%- endfor %} return None""" - return Template(file_tmp).render(rule_type = rule_type, p = proto_message) - -class UnimplementedException(Exception): - pass - -class ValidationFailed(Exception): - pass + return Template(file_tmp).render(rule_type=rule_type, p=proto_message) diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 000000000..374b58cbf --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" diff --git a/python/requirements.in b/python/requirements.in new file mode 100644 index 000000000..96708aba2 --- /dev/null +++ b/python/requirements.in @@ -0,0 +1,7 @@ +# These must be the same as the requirements listed in +# install_requires section of setup.cfg. A test checks for this. +# See requirements_test.py for more context. +ipaddress>=1.0.22 +validate-email>=1.3 +Jinja2>=2.11.1 +protobuf>=3.6.1 diff --git a/python/setup.cfg b/python/setup.cfg new file mode 100644 index 000000000..7dd6e3668 --- /dev/null +++ b/python/setup.cfg @@ -0,0 +1,33 @@ +[metadata] +name = protoc-gen-validate +version = 0.4.1 +author = Envoyproxy +author_email = dev@lyft.com +description = PGV for python via just-in-time code generation +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/envoyproxy/protoc-gen-validate +license_files = LICENSE +classifiers = + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + +[options] +packages = find: +install_requires = + ipaddress>=1.0.22 + validate-email>=1.3 + Jinja2>=2.11.1 + protobuf>=3.6.1 +python_requires = >=3.6 + +[options.data_files] +data = validate.proto + +[flake8] +max-line-length = 120 +ignore = + F401 # unused imports diff --git a/requirements.txt b/requirements.txt index 67e6ac8cb..9fa5c7ef1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,8 @@ -ipaddress==1.0.22 -validate-email==1.3 -Jinja2==2.11.1 -MarkupSafe==1.1.1 +# Requirements for linting, building, and uploading the PGV python package to PyPI. +# The package's own requirements are in python/setup.cfg (and also in python/requirements.in). +flake8==3.8.4 +isort==5.7.0 +build==0.3.0 +twine==3.3.0 +wheel==0.36.2 +setuptools==53.0.0 diff --git a/tests/harness/BUILD b/tests/harness/BUILD index 18d008c8d..c5dfc5637 100644 --- a/tests/harness/BUILD +++ b/tests/harness/BUILD @@ -43,10 +43,8 @@ go_library( ) py_proto_library( - name = "python-harness-proto", + name = "harness_py_proto", srcs = ["harness.proto"], - default_runtime = "@com_google_protobuf//:protobuf_python", - protoc = "@com_google_protobuf//:protoc", visibility = ["//visibility:public"], deps = ["@com_google_protobuf//:protobuf_python"], ) diff --git a/tests/harness/cases/BUILD b/tests/harness/cases/BUILD index 9d93e5089..a71e8994b 100644 --- a/tests/harness/cases/BUILD +++ b/tests/harness/cases/BUILD @@ -1,11 +1,11 @@ load("@rules_java//java:defs.bzl", "java_proto_library") load("@rules_proto//proto:defs.bzl", "proto_library") +load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") load( "//bazel:pgv_proto_library.bzl", "pgv_cc_proto_library", "pgv_go_proto_library", "pgv_java_proto_library", - "pgv_python_proto_library", ) # gazelle:go_generate_proto false @@ -76,11 +76,31 @@ pgv_java_proto_library( deps = [":cases_proto"], ) -pgv_python_proto_library( - name = "python", - python_deps = [ - "//tests/harness/cases/other_package:python", +# There is not currently a canonical implementation of py_proto_library in Bazel. +# This py_proto_library implementation is from "github.com/protocolbuffers/protobuf" and works differently from other +# languages' canonical implementations - for example, it doesn't take "proto_library" targets as input. +py_proto_library( + name = "cases_py_proto", + srcs = [ + "bool.proto", + "bytes.proto", + "enums.proto", + "kitchen_sink.proto", + "maps.proto", + "messages.proto", + "numbers.proto", + "oneofs.proto", + "repeated.proto", + "strings.proto", + "wkt_any.proto", + "wkt_duration.proto", + "wkt_timestamp.proto", + "wkt_wrappers.proto", ], visibility = ["//visibility:public"], - deps = [":cases_proto"], + deps = [ + "//validate:validate_py", + "//tests/harness/cases/other_package:embed_python_proto", + "@com_google_protobuf//:protobuf_python", + ], ) diff --git a/tests/harness/cases/other_package/BUILD b/tests/harness/cases/other_package/BUILD index bd20d55b4..7b86aa2a4 100644 --- a/tests/harness/cases/other_package/BUILD +++ b/tests/harness/cases/other_package/BUILD @@ -1,5 +1,5 @@ load("@rules_java//java:defs.bzl", "java_proto_library") - +load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") load("@io_bazel_rules_go//go:def.bzl", "go_library") load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") load("@rules_proto//proto:defs.bzl", "proto_library") @@ -8,7 +8,6 @@ load( "pgv_cc_proto_library", "pgv_go_proto_library", "pgv_java_proto_library", - "pgv_python_proto_library", ) proto_library( @@ -70,8 +69,12 @@ pgv_java_proto_library( deps = [":embed_proto"], ) -pgv_python_proto_library( - name = "python", +py_proto_library( + name = "embed_python_proto", + srcs = ["embed.proto"], visibility = ["//visibility:public"], - deps = [":embed_proto"], + deps = [ + "//validate:validate_py", + "@com_google_protobuf//:protobuf_python", + ], ) diff --git a/tests/harness/python/BUILD b/tests/harness/python/BUILD index 0349efa8c..0babe43a4 100644 --- a/tests/harness/python/BUILD +++ b/tests/harness/python/BUILD @@ -1,4 +1,4 @@ -load("@io_bazel_rules_python//python:python.bzl", "py_binary") +load("@rules_python//python:defs.bzl", "py_binary", "py_test") load("@pgv_pip_deps//:requirements.bzl", "requirement") py_binary( @@ -6,15 +6,21 @@ py_binary( srcs = ["harness.py"], main = "harness.py", srcs_version = "PY3", - visibility = ["//tests/harness:__subpackages__"], + visibility = ["//visibility:public"], deps = [ - "//tests/harness:python-harness-proto", - "//tests/harness/cases:python", - "//validate:validator_py", - requirement("validate-email"), - requirement("ipaddress"), - requirement("Jinja2"), - requirement("MarkupSafe"), - ], - # Python Version is set by default to PY2AND3. + # ensures we test with the package's own protobuf runtime specified in setup.cfg + # and not the one riding on the py_proto_library dependencies + requirement("protobuf"), + "//tests/harness:harness_py_proto", + "//tests/harness/cases:cases_py_proto", + "//python:validator_py" + ] +) + +py_test( + name = "python-requirements-match", + srcs = ["requirements_test.py"], + main = "requirements_test.py", + srcs_version = "PY3", + data = ["//python:setup.cfg", "//python:requirements.in"], ) diff --git a/tests/harness/python/harness.py b/tests/harness/python/harness.py index 78c674e0b..9e3ade7fd 100644 --- a/tests/harness/python/harness.py +++ b/tests/harness/python/harness.py @@ -1,13 +1,13 @@ import sys import inspect -import os + +from python.protoc_gen_validate.validator import validate, ValidationFailed from tests.harness.harness_pb2 import TestCase, TestResult from tests.harness.cases.bool_pb2 import * from tests.harness.cases.bytes_pb2 import * from tests.harness.cases.enums_pb2 import * from tests.harness.cases.enums_pb2 import * -from tests.harness.cases.filename_with_dash_pb2 import * from tests.harness.cases.messages_pb2 import * from tests.harness.cases.numbers_pb2 import * from tests.harness.cases.oneofs_pb2 import * @@ -20,45 +20,30 @@ from tests.harness.cases.wkt_timestamp_pb2 import * from tests.harness.cases.kitchen_sink_pb2 import * -from validate.validator import validate, ValidationFailed, UnimplementedException, print_validate -class_list = [] +message_classes = {} for k, v in inspect.getmembers(sys.modules[__name__], inspect.isclass): if 'DESCRIPTOR' in dir(v): - class_list.append(v) + message_classes[v.DESCRIPTOR.full_name] = v -def unpack(message): - for cls in class_list: - if message.Is(cls.DESCRIPTOR): - test_class = cls() - message.Unpack(test_class) - return test_class if __name__ == "__main__": - if sys.version_info[0] >= 3: - message = sys.stdin.buffer.read() - else: - message = sys.stdin.read() + read = sys.stdin.buffer.read() + testcase = TestCase() - try: - testcase.ParseFromString(message) - except TypeError: - testcase.ParseFromString(message.encode(errors='surrogateescape')) - test_class = unpack(testcase.message) + testcase.ParseFromString(read) + + test_class = message_classes[testcase.message.TypeName()] + test_msg = test_class() + testcase.message.Unpack(test_msg) + try: result = TestResult() - valid = validate(test_class) - valid(test_class) + valid = validate(test_msg) result.Valid = True except ValidationFailed as e: result.Valid = False result.Reason = repr(e) - except UnimplementedException as e: - result.Error = False - result.AllowFailure = True - result.Reason = repr(e) + sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8') - try: - sys.stdout.write(result.SerializeToString().decode("utf-8")) - except TypeError: - sys.stdout.write(result.SerializeToString().decode("utf-8", errors='surrogateescape')) + sys.stdout.write(result.SerializeToString().decode("utf-8")) diff --git a/tests/harness/python/requirements_test.py b/tests/harness/python/requirements_test.py new file mode 100755 index 000000000..2aac6fe04 --- /dev/null +++ b/tests/harness/python/requirements_test.py @@ -0,0 +1,24 @@ +import configparser + +# There's two sets of requirements relevant for python. The first set in requirements.txt is installed +# during the Docker build and is used for linting, building, and uploading the PGV python package to PyPI. +# +# The other set is in the install_requires section of setup.cfg. This is what's needed to use the package. +# +# We use pip_install from @rules_python to install these requirements in order to test the package. Unfortunately: +# - pip_install can't handle setup.cfg directly, it wants a file containing a simple list +# - as a bazel repository_rule, pip_install won't accept generated files as input so we can't autogen +# this simpler file out of setup.cfg as part of bazel build. +# +# So instead here we just check that requirements.in matches what's in install_requires of setup.cfg. + + +with open('python/requirements.in', 'r') as reqs: + lines = reqs.readlines() + requirements_dot_in_set = {line.strip() for line in lines if line.strip() and not line.startswith("#")} + +config = configparser.ConfigParser() +config.read('python/setup.cfg') +setup_dot_cfg_set = {line for line in config['options']['install_requires'].split() if not line.startswith("#")} + +assert requirements_dot_in_set == setup_dot_cfg_set diff --git a/validate/BUILD b/validate/BUILD index 6816220d9..4e3837e20 100644 --- a/validate/BUILD +++ b/validate/BUILD @@ -27,8 +27,6 @@ cc_proto_library( py_proto_library( name = "validate_py", srcs = ["validate.proto"], - default_runtime = "@com_google_protobuf//:protobuf_python", - protoc = "@com_google_protobuf//:protoc", deps = ["@com_google_protobuf//:protobuf_python"], ) @@ -47,9 +45,3 @@ java_proto_library( name = "validate_java", deps = [":validate_proto"], ) - -py_library( - name = "validator_py", - srcs = ["validator.py"], - srcs_version = "PY3", -)