From dfaabce7bc623b6146986f149161e6b5f21a9831 Mon Sep 17 00:00:00 2001
From: Santi Manero <100587318+smaneroiriusrisk@users.noreply.github.com>
Date: Mon, 23 Oct 2023 11:08:30 +0200
Subject: [PATCH] [feature/OPT-1021] to dev (#330)
* [OPT-1021] Implemented DrawioValidator
* [OPT-1021] workaround DiagramMapper implementation
* [OPT-1021] fix merge conflicts
* [OPT-1021] fix otm representation issue
* [OPT-1021] Updated drawio xsd
---------
Co-authored-by: PacoCid <117292868+PacoCid@users.noreply.github.com>
---
setup.py | 1 +
sl_util/sl_util/secure_regex.py | 4 +
sl_util/tests/util/__init__.py | 0
.../tests/util/file_utils.py | 0
slp_base/slp_base/provider_type.py | 22 ++--
slp_base/slp_base/provider_validator.py | 5 +
.../resources/schemas/drawio_schema.xsd | 89 ++++++++++++++
slp_drawio/slp_drawio/drawio_processor.py | 3 +-
slp_drawio/slp_drawio/parse/diagram_mapper.py | 8 +-
slp_drawio/slp_drawio/parse/drawio_parser.py | 11 +-
.../slp_drawio/validate/drawio_validator.py | 40 +++++-
.../tests/resources/drawio/not_xml.drawio | 2 +
.../resources/drawio/wrong_mxcell.drawio | 23 ++++
.../resources/drawio/wrong_mxfile.drawio | 23 ++++
.../drawio/wrong_mxgraphmodel.drawio | 23 ++++
.../tests/resources/drawio/wrong_root.drawio | 23 ++++
.../tests/resources/test_resource_paths.py | 5 +
slp_drawio/tests/validate/__init__.py | 0
.../tests/validate/test_drawio_validator.py | 116 ++++++++++++++++++
.../tests/integration/test_visio_processor.py | 2 +-
20 files changed, 380 insertions(+), 20 deletions(-)
create mode 100644 sl_util/tests/util/__init__.py
rename slp_visio/tests/util/files.py => sl_util/tests/util/file_utils.py (100%)
create mode 100644 slp_drawio/resources/schemas/drawio_schema.xsd
create mode 100644 slp_drawio/tests/resources/drawio/not_xml.drawio
create mode 100644 slp_drawio/tests/resources/drawio/wrong_mxcell.drawio
create mode 100644 slp_drawio/tests/resources/drawio/wrong_mxfile.drawio
create mode 100644 slp_drawio/tests/resources/drawio/wrong_mxgraphmodel.drawio
create mode 100644 slp_drawio/tests/resources/drawio/wrong_root.drawio
create mode 100644 slp_drawio/tests/validate/__init__.py
create mode 100644 slp_drawio/tests/validate/test_drawio_validator.py
diff --git a/setup.py b/setup.py
index b3e498ab..41894e3e 100644
--- a/setup.py
+++ b/setup.py
@@ -34,6 +34,7 @@
'networkx==3.1',
'dependency-injector==4.41.0',
'google-re2==1.0',
+ 'xmlschema==2.5.0',
# Do not upgrade pygraphviz unless security issues because it is heavily dependent on the underlying OS
'pygraphviz==1.10'
],
diff --git a/sl_util/sl_util/secure_regex.py b/sl_util/sl_util/secure_regex.py
index 64a7a307..77b0b229 100644
--- a/sl_util/sl_util/secure_regex.py
+++ b/sl_util/sl_util/secure_regex.py
@@ -19,3 +19,7 @@ def findall(regex, string, options=None):
def split(pattern, text, maxsplit=0, options=None):
return re2.split(pattern, text, maxsplit, options)
+
+
+def compile(pattern, options=None):
+ return re2.compile(pattern, options)
diff --git a/sl_util/tests/util/__init__.py b/sl_util/tests/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/slp_visio/tests/util/files.py b/sl_util/tests/util/file_utils.py
similarity index 100%
rename from slp_visio/tests/util/files.py
rename to sl_util/tests/util/file_utils.py
diff --git a/slp_base/slp_base/provider_type.py b/slp_base/slp_base/provider_type.py
index 8ae374d8..202f9c2a 100644
--- a/slp_base/slp_base/provider_type.py
+++ b/slp_base/slp_base/provider_type.py
@@ -1,25 +1,31 @@
from otm.otm.entity.representation import RepresentationType
from otm.otm.provider import Provider
+application_json = 'application/json'
+text_plain = 'text/plain'
+application_octet_stream = 'application/octet-stream'
+application_xml = 'application/xml'
+
class IacType(str, Provider):
CLOUDFORMATION = ("CLOUDFORMATION", "CloudFormation", RepresentationType.CODE,
- ['application/json', 'text/yaml', 'text/plain', 'application/octet-stream'])
+ [application_json, 'text/yaml', text_plain, application_octet_stream])
TERRAFORM = ("TERRAFORM", "Terraform", RepresentationType.CODE,
- ['text/plain', 'application/octet-stream', 'application/json'])
+ [text_plain, application_octet_stream, application_json])
TFPLAN = ("TFPLAN", "Terraform Plan", RepresentationType.CODE,
- ['text/plain', 'application/json', 'application/msword', 'text/vnd.graphviz', 'application/octet-stream'])
+ [text_plain, application_json, 'application/msword', 'text/vnd.graphviz', application_octet_stream])
class DiagramType(str, Provider):
VISIO = ("VISIO", "Visio", RepresentationType.DIAGRAM,
- ['application/vnd.ms-visio.drawing.main+xml', 'application/octet-stream'])
+ ['application/vnd.ms-visio.drawing.main+xml', application_octet_stream])
LUCID = ("LUCID", "Lucidchart", RepresentationType.DIAGRAM,
- ['application/vnd.ms-visio.drawing.main+xml', 'application/octet-stream', 'application/zip'])
- # DRAWIO = ("DRAWIO", "Drawio", RepresentationType.DIAGRAM,
- # ['application/octet-stream', 'application/xml', 'text/plain'])
+ ['application/vnd.ms-visio.drawing.main+xml', application_octet_stream, 'application/zip'])
+ DRAWIO = ("DRAWIO", "Drawio", RepresentationType.DIAGRAM,
+ [application_octet_stream, application_xml, text_plain])
class EtmType(str, Provider):
+
MTMT = ("MTMT", "Microsoft Threat Modeling Tool", RepresentationType.THREAT_MODEL,
- ['application/octet-stream', 'application/xml', 'text/plain'])
+ [application_octet_stream, application_xml, text_plain])
diff --git a/slp_base/slp_base/provider_validator.py b/slp_base/slp_base/provider_validator.py
index 504017cf..7b59908d 100644
--- a/slp_base/slp_base/provider_validator.py
+++ b/slp_base/slp_base/provider_validator.py
@@ -14,6 +14,11 @@ def generate_size_error(provider: Provider, source_file_name: str, exception=Sou
message=f'Provided {source_file_name} is not valid. Invalid size')
+def generate_schema_error(provider: Provider, source_file_name: str, exception=SourceFileNotValidError):
+ return exception(title=f'{provider.provider_name} file is not valid',
+ message=f'Provided {source_file_name} is not valid. It does not comply with schema')
+
+
class ProviderValidator(metaclass=abc.ABCMeta):
"""
Formal Interface to validate the provider source data
diff --git a/slp_drawio/resources/schemas/drawio_schema.xsd b/slp_drawio/resources/schemas/drawio_schema.xsd
new file mode 100644
index 00000000..7829f011
--- /dev/null
+++ b/slp_drawio/resources/schemas/drawio_schema.xsd
@@ -0,0 +1,89 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/slp_drawio/slp_drawio/drawio_processor.py b/slp_drawio/slp_drawio/drawio_processor.py
index ad3ee62e..d309108b 100644
--- a/slp_drawio/slp_drawio/drawio_processor.py
+++ b/slp_drawio/slp_drawio/drawio_processor.py
@@ -1,5 +1,4 @@
-from slp_base import OTMProcessor, ProviderValidator, ProviderLoader, MappingValidator, MappingLoader, ProviderParser, \
- DiagramType
+from slp_base import OTMProcessor, ProviderValidator, ProviderLoader, MappingValidator, MappingLoader, ProviderParser
from slp_drawio.slp_drawio.load.drawio_loader import DrawioLoader
from slp_drawio.slp_drawio.load.drawio_mapping_file_loader import DrawioMappingFileLoader
from slp_drawio.slp_drawio.parse.drawio_parser import DrawioParser
diff --git a/slp_drawio/slp_drawio/parse/diagram_mapper.py b/slp_drawio/slp_drawio/parse/diagram_mapper.py
index dce2f9ed..8a5a9ac5 100644
--- a/slp_drawio/slp_drawio/parse/diagram_mapper.py
+++ b/slp_drawio/slp_drawio/parse/diagram_mapper.py
@@ -1,11 +1,13 @@
from slp_drawio.slp_drawio.load.drawio_mapping_file_loader import DrawioMapping
-from slp_drawio.slp_drawio.objects.diagram_objects import Diagram
+from slp_drawio.slp_drawio.objects.diagram_objects import Diagram, DiagramRepresentation
class DiagramMapper:
- def __init__(self, diagram: Diagram, mapping: DrawioMapping):
+ def __init__(self, project_id: str, diagram: Diagram, mapping: DrawioMapping):
+ self.project_id = project_id
self.diagram = diagram
self.mapping = mapping
+ self.size = {'width': 1000, 'height': 1000}
def map(self):
- pass
+ self.diagram.representation = DiagramRepresentation(self.project_id, self.size)
diff --git a/slp_drawio/slp_drawio/parse/drawio_parser.py b/slp_drawio/slp_drawio/parse/drawio_parser.py
index 13751f4f..14f8d1e1 100644
--- a/slp_drawio/slp_drawio/parse/drawio_parser.py
+++ b/slp_drawio/slp_drawio/parse/drawio_parser.py
@@ -29,10 +29,15 @@ def build_otm(self) -> OTM:
return otm
def map_components_and_trustzones(self):
- DiagramMapper(self.diagram, self.mapping).map()
+ DiagramMapper(self.project_id, self.diagram, self.mapping).map()
def __build_otm(self):
- # TODO waiting the parser implementation
- pass
+ otm = OTMBuilder(self.project_id, self.project_name, DiagramType.DRAWIO).build()
+ otm.representations = [self.diagram.representation.otm]
+ otm.components = [c.otm for c in self.diagram.components]
+ otm.dataflows = [d.otm for d in self.diagram.dataflows]
+ otm.trustzones = [t.otm for t in self.diagram.trustzones]
+
+ return otm
diff --git a/slp_drawio/slp_drawio/validate/drawio_validator.py b/slp_drawio/slp_drawio/validate/drawio_validator.py
index 84941720..ddb0482d 100644
--- a/slp_drawio/slp_drawio/validate/drawio_validator.py
+++ b/slp_drawio/slp_drawio/validate/drawio_validator.py
@@ -1,23 +1,57 @@
+import copy
import logging
+import os
+import string
+import uuid
-from slp_base import ProviderValidator
+import xmlschema
+
+from slp_base import ProviderValidator, DiagramFileNotValidError, DiagramType
+from slp_base.slp_base.provider_validator import generate_size_error, generate_content_type_error, generate_schema_error
logger = logging.getLogger(__name__)
+MAX_SIZE = 10 * 1024 * 1024
+MIN_SIZE = 10
+
+path = os.path.dirname(__file__)
+
class DrawioValidator(ProviderValidator):
def __init__(self, data):
super(DrawioValidator, self).__init__()
self.data = data
+ self.provider = DiagramType.DRAWIO
+ self.xsd_schema = f'{path}/../../resources/schemas/drawio_schema.xsd'
def validate(self):
logger.info('Validating Drawio file')
self.__validate_size()
self.__validate_content_type()
+ self.__sanitize_name()
+ self.__validate_schema()
def __validate_size(self):
- pass
+ size = self.data.size
+ if size > MAX_SIZE or size < MIN_SIZE:
+ raise generate_size_error(self.provider, 'diag_file', DiagramFileNotValidError)
def __validate_content_type(self):
- pass
+ mime = self.data.content_type
+ if mime not in self.provider.valid_mime:
+ raise generate_content_type_error(self.provider, 'diag_file', DiagramFileNotValidError)
+
+ def __sanitize_name(self):
+ ext = self.data.filename.split('.')[-1]
+ ext = "".join([c for c in ext if c in string.ascii_letters])
+ name = str(uuid.uuid4())
+ self.data.filename = f'{name}.{ext}'
+
+ def __validate_schema(self):
+ schema = xmlschema.XMLSchema(self.xsd_schema)
+ try:
+ file_copy = copy.deepcopy(self.data.file)
+ schema.validate(file_copy)
+ except Exception:
+ raise generate_schema_error(self.provider, 'diag_file', DiagramFileNotValidError)
diff --git a/slp_drawio/tests/resources/drawio/not_xml.drawio b/slp_drawio/tests/resources/drawio/not_xml.drawio
new file mode 100644
index 00000000..393c4e9b
--- /dev/null
+++ b/slp_drawio/tests/resources/drawio/not_xml.drawio
@@ -0,0 +1,2 @@
+WRONG_CONTENT
+NOT XML
\ No newline at end of file
diff --git a/slp_drawio/tests/resources/drawio/wrong_mxcell.drawio b/slp_drawio/tests/resources/drawio/wrong_mxcell.drawio
new file mode 100644
index 00000000..f0b6fae0
--- /dev/null
+++ b/slp_drawio/tests/resources/drawio/wrong_mxcell.drawio
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/slp_drawio/tests/resources/drawio/wrong_mxfile.drawio b/slp_drawio/tests/resources/drawio/wrong_mxfile.drawio
new file mode 100644
index 00000000..e80bb1b2
--- /dev/null
+++ b/slp_drawio/tests/resources/drawio/wrong_mxfile.drawio
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/slp_drawio/tests/resources/drawio/wrong_mxgraphmodel.drawio b/slp_drawio/tests/resources/drawio/wrong_mxgraphmodel.drawio
new file mode 100644
index 00000000..99d1c6a9
--- /dev/null
+++ b/slp_drawio/tests/resources/drawio/wrong_mxgraphmodel.drawio
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/slp_drawio/tests/resources/drawio/wrong_root.drawio b/slp_drawio/tests/resources/drawio/wrong_root.drawio
new file mode 100644
index 00000000..11985a3b
--- /dev/null
+++ b/slp_drawio/tests/resources/drawio/wrong_root.drawio
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/slp_drawio/tests/resources/test_resource_paths.py b/slp_drawio/tests/resources/test_resource_paths.py
index f12eccef..09724908 100644
--- a/slp_drawio/tests/resources/test_resource_paths.py
+++ b/slp_drawio/tests/resources/test_resource_paths.py
@@ -7,3 +7,8 @@
aws_minimal_xml = f'{drawio}/aws_minimal.drawio.xml'
aws_minimal_drawio = f'{drawio}/aws_minimal.drawio'
aws_minimal_drawio_as_json = f'{drawio}/aws_minimal_source.json'
+wrong_mxcell_drawio = f'{drawio}/wrong_mxcell.drawio'
+wrong_mxfile_drawio = f'{drawio}/wrong_mxfile.drawio'
+wrong_mxgraphmodel_drawio = f'{drawio}/wrong_mxgraphmodel.drawio'
+wrong_root_drawio = f'{drawio}/wrong_root.drawio'
+not_xml = f'{drawio}/not_xml.drawio'
diff --git a/slp_drawio/tests/validate/__init__.py b/slp_drawio/tests/validate/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/slp_drawio/tests/validate/test_drawio_validator.py b/slp_drawio/tests/validate/test_drawio_validator.py
new file mode 100644
index 00000000..b30e511b
--- /dev/null
+++ b/slp_drawio/tests/validate/test_drawio_validator.py
@@ -0,0 +1,116 @@
+from unittest.mock import Mock
+
+import pytest
+from starlette.datastructures import UploadFile, Headers
+
+from sl_util.sl_util import secure_regex as re
+from sl_util.tests.util.file_utils import get_upload_file
+
+from slp_base import DiagramFileNotValidError, CommonError
+from slp_drawio.slp_drawio.validate.drawio_validator import DrawioValidator
+from slp_drawio.tests.resources.test_resource_paths import wrong_mxgraphmodel_drawio, wrong_mxfile_drawio, \
+ wrong_mxcell_drawio, wrong_root_drawio, aws_minimal_drawio, aws_minimal_xml, not_xml
+
+
+filename_pattern = re.compile('^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}\\.[drawio|xml]')
+
+
+class TestDrawioValidator:
+
+ @pytest.mark.parametrize('size_value', [
+ pytest.param(9.999, id="less than expected min"),
+ pytest.param(10.001 * 1024 * 1024, id="more than expected max")
+ ])
+ def test_invalid_sizes(self, size_value):
+ # GIVEN the validator with an invalid size
+ validator = DrawioValidator(Mock(size=size_value))
+
+ # WHEN we validate
+ with pytest.raises(DiagramFileNotValidError) as error_info:
+ validator.validate()
+
+ # THEN the error raised is as expected
+ assert error_info.typename == 'DiagramFileNotValidError'
+ assert error_info.value.message == 'Provided diag_file is not valid. Invalid size'
+
+ @pytest.mark.parametrize('mime', [
+ 'application/zip', 'application/pdf', 'text/html'
+ ])
+ def test_invalid_mimetype(self, mime):
+ # GIVEN the validator with an invalid size
+ validator = DrawioValidator(Mock(content_type=mime, size=10))
+
+ # WHEN we validate
+ with pytest.raises(DiagramFileNotValidError) as error_info:
+ validator.validate()
+
+ # THEN the error raised is as expected
+ assert error_info.typename == 'DiagramFileNotValidError'
+ assert error_info.value.message == 'Invalid content type for diag_file'
+
+ @pytest.mark.parametrize('filepath', [
+ pytest.param(wrong_root_drawio, id='wrong_root'),
+ pytest.param(wrong_mxcell_drawio, id='wrong_mxcell'),
+ pytest.param(wrong_mxfile_drawio, id='wrong_mxfile'),
+ pytest.param(wrong_mxgraphmodel_drawio, id='wrong_mxgraphmodel'),
+ pytest.param(not_xml, id='not_xml')
+ ])
+ def test_invalid_schema(self, filepath: str):
+ # GIVEN the mocked file
+ file: UploadFile = get_upload_file(filepath)
+ file.size = 10
+ file.headers = Headers({'content-type': 'application/octet-stream'})
+ # AND the validator
+ validator = DrawioValidator(file)
+
+ # WHEN we validate
+ with pytest.raises(DiagramFileNotValidError) as error_info:
+ validator.validate()
+
+ # THEN the error raised is as expected
+ assert error_info.typename == 'DiagramFileNotValidError'
+ assert error_info.value.message == 'Provided diag_file is not valid. It does not comply with schema'
+
+ @pytest.mark.parametrize('mime, size, filepath', [
+ pytest.param('application/octet-stream', 10, aws_minimal_drawio, id='encoded-tiny-binary'),
+ pytest.param('application/xml', 10 * 1024 * 1024, aws_minimal_xml, id='xml-big-xml'),
+ pytest.param('text/plain', 10 * 1024 * 1024, aws_minimal_drawio, id='encoded-big-text')
+ ])
+ def test_valid_file(self, mime: str, size: int, filepath: str):
+ # GIVEN the mocked file
+ file: UploadFile = get_upload_file(filepath)
+ file.size = size
+ file.headers = Headers({'content-type': mime})
+ # AND the validator
+ validator = DrawioValidator(file)
+
+ # WHEN we validate THEN no CommonError is raised
+ try:
+ validator.validate()
+ except CommonError:
+ assert False
+
+ # AND the filename is an uuid and the original extension
+ match = filename_pattern.match(file.filename)
+ assert match
+
+ @pytest.mark.parametrize('ext', [
+ pytest.param('\x1C\x00d\x08r\x7Fawio\x0D', id='control-chars'),
+ pytest.param(' x\x200\x2Am\x26\x25l\x2F', id='printable-non-alpha'),
+ pytest.param('0d1r2a3w4i5o90', id='numeric')
+ ])
+ def test_filename_sanitized(self, ext: str):
+ # GIVEN the mocked file
+ file: UploadFile = get_upload_file(aws_minimal_drawio)
+ file.size = 10
+ file.filename = f'e.xam.ple.{ext}'
+ file.headers = Headers({'content-type': 'application/xml'})
+ # AND the validator
+ validator = DrawioValidator(file)
+
+ # WHEN we validate
+ validator.validate()
+
+ # THEN the filename is an uuid and a valid extension
+ match = filename_pattern.match(file.filename)
+ assert match
diff --git a/slp_visio/tests/integration/test_visio_processor.py b/slp_visio/tests/integration/test_visio_processor.py
index 44fd9c76..ca2fbb15 100644
--- a/slp_visio/tests/integration/test_visio_processor.py
+++ b/slp_visio/tests/integration/test_visio_processor.py
@@ -14,7 +14,7 @@
expected_visio_aws_with_tz_and_vpc, expected_visio_orphan_dataflows, expected_visio_bidirectional_connectors, \
expected_visio_modified_single_connectors, visio_nested_tzs, expected_visio_nested_tzs, default_visio_mapping, \
visio_nested_tzs_inside_component, expected_visio_nested_tzs_inside_component
-from slp_visio.tests.util.files import file_exists, get_upload_file
+from sl_util.tests.util.file_utils import file_exists, get_upload_file
class TestVisioProcessor: