diff --git a/setup.py b/setup.py index b3e498ab..41894e3e 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ 'networkx==3.1', 'dependency-injector==4.41.0', 'google-re2==1.0', + 'xmlschema==2.5.0', # Do not upgrade pygraphviz unless security issues because it is heavily dependent on the underlying OS 'pygraphviz==1.10' ], diff --git a/sl_util/sl_util/secure_regex.py b/sl_util/sl_util/secure_regex.py index 64a7a307..77b0b229 100644 --- a/sl_util/sl_util/secure_regex.py +++ b/sl_util/sl_util/secure_regex.py @@ -19,3 +19,7 @@ def findall(regex, string, options=None): def split(pattern, text, maxsplit=0, options=None): return re2.split(pattern, text, maxsplit, options) + + +def compile(pattern, options=None): + return re2.compile(pattern, options) diff --git a/sl_util/tests/util/__init__.py b/sl_util/tests/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/slp_visio/tests/util/files.py b/sl_util/tests/util/file_utils.py similarity index 100% rename from slp_visio/tests/util/files.py rename to sl_util/tests/util/file_utils.py diff --git a/slp_base/slp_base/provider_type.py b/slp_base/slp_base/provider_type.py index 8ae374d8..202f9c2a 100644 --- a/slp_base/slp_base/provider_type.py +++ b/slp_base/slp_base/provider_type.py @@ -1,25 +1,31 @@ from otm.otm.entity.representation import RepresentationType from otm.otm.provider import Provider +application_json = 'application/json' +text_plain = 'text/plain' +application_octet_stream = 'application/octet-stream' +application_xml = 'application/xml' + class IacType(str, Provider): CLOUDFORMATION = ("CLOUDFORMATION", "CloudFormation", RepresentationType.CODE, - ['application/json', 'text/yaml', 'text/plain', 'application/octet-stream']) + [application_json, 'text/yaml', text_plain, application_octet_stream]) TERRAFORM = ("TERRAFORM", "Terraform", RepresentationType.CODE, - ['text/plain', 'application/octet-stream', 'application/json']) + [text_plain, application_octet_stream, application_json]) TFPLAN = ("TFPLAN", "Terraform Plan", RepresentationType.CODE, - ['text/plain', 'application/json', 'application/msword', 'text/vnd.graphviz', 'application/octet-stream']) + [text_plain, application_json, 'application/msword', 'text/vnd.graphviz', application_octet_stream]) class DiagramType(str, Provider): VISIO = ("VISIO", "Visio", RepresentationType.DIAGRAM, - ['application/vnd.ms-visio.drawing.main+xml', 'application/octet-stream']) + ['application/vnd.ms-visio.drawing.main+xml', application_octet_stream]) LUCID = ("LUCID", "Lucidchart", RepresentationType.DIAGRAM, - ['application/vnd.ms-visio.drawing.main+xml', 'application/octet-stream', 'application/zip']) - # DRAWIO = ("DRAWIO", "Drawio", RepresentationType.DIAGRAM, - # ['application/octet-stream', 'application/xml', 'text/plain']) + ['application/vnd.ms-visio.drawing.main+xml', application_octet_stream, 'application/zip']) + DRAWIO = ("DRAWIO", "Drawio", RepresentationType.DIAGRAM, + [application_octet_stream, application_xml, text_plain]) class EtmType(str, Provider): + MTMT = ("MTMT", "Microsoft Threat Modeling Tool", RepresentationType.THREAT_MODEL, - ['application/octet-stream', 'application/xml', 'text/plain']) + [application_octet_stream, application_xml, text_plain]) diff --git a/slp_base/slp_base/provider_validator.py b/slp_base/slp_base/provider_validator.py index 504017cf..7b59908d 100644 --- a/slp_base/slp_base/provider_validator.py +++ b/slp_base/slp_base/provider_validator.py @@ -14,6 +14,11 @@ def generate_size_error(provider: Provider, source_file_name: str, exception=Sou message=f'Provided {source_file_name} is not valid. Invalid size') +def generate_schema_error(provider: Provider, source_file_name: str, exception=SourceFileNotValidError): + return exception(title=f'{provider.provider_name} file is not valid', + message=f'Provided {source_file_name} is not valid. It does not comply with schema') + + class ProviderValidator(metaclass=abc.ABCMeta): """ Formal Interface to validate the provider source data diff --git a/slp_drawio/resources/schemas/drawio_schema.xsd b/slp_drawio/resources/schemas/drawio_schema.xsd new file mode 100644 index 00000000..7829f011 --- /dev/null +++ b/slp_drawio/resources/schemas/drawio_schema.xsd @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/slp_drawio/slp_drawio/drawio_processor.py b/slp_drawio/slp_drawio/drawio_processor.py index ad3ee62e..d309108b 100644 --- a/slp_drawio/slp_drawio/drawio_processor.py +++ b/slp_drawio/slp_drawio/drawio_processor.py @@ -1,5 +1,4 @@ -from slp_base import OTMProcessor, ProviderValidator, ProviderLoader, MappingValidator, MappingLoader, ProviderParser, \ - DiagramType +from slp_base import OTMProcessor, ProviderValidator, ProviderLoader, MappingValidator, MappingLoader, ProviderParser from slp_drawio.slp_drawio.load.drawio_loader import DrawioLoader from slp_drawio.slp_drawio.load.drawio_mapping_file_loader import DrawioMappingFileLoader from slp_drawio.slp_drawio.parse.drawio_parser import DrawioParser diff --git a/slp_drawio/slp_drawio/parse/diagram_mapper.py b/slp_drawio/slp_drawio/parse/diagram_mapper.py index dce2f9ed..8a5a9ac5 100644 --- a/slp_drawio/slp_drawio/parse/diagram_mapper.py +++ b/slp_drawio/slp_drawio/parse/diagram_mapper.py @@ -1,11 +1,13 @@ from slp_drawio.slp_drawio.load.drawio_mapping_file_loader import DrawioMapping -from slp_drawio.slp_drawio.objects.diagram_objects import Diagram +from slp_drawio.slp_drawio.objects.diagram_objects import Diagram, DiagramRepresentation class DiagramMapper: - def __init__(self, diagram: Diagram, mapping: DrawioMapping): + def __init__(self, project_id: str, diagram: Diagram, mapping: DrawioMapping): + self.project_id = project_id self.diagram = diagram self.mapping = mapping + self.size = {'width': 1000, 'height': 1000} def map(self): - pass + self.diagram.representation = DiagramRepresentation(self.project_id, self.size) diff --git a/slp_drawio/slp_drawio/parse/drawio_parser.py b/slp_drawio/slp_drawio/parse/drawio_parser.py index 13751f4f..14f8d1e1 100644 --- a/slp_drawio/slp_drawio/parse/drawio_parser.py +++ b/slp_drawio/slp_drawio/parse/drawio_parser.py @@ -29,10 +29,15 @@ def build_otm(self) -> OTM: return otm def map_components_and_trustzones(self): - DiagramMapper(self.diagram, self.mapping).map() + DiagramMapper(self.project_id, self.diagram, self.mapping).map() def __build_otm(self): - # TODO waiting the parser implementation - pass + otm = OTMBuilder(self.project_id, self.project_name, DiagramType.DRAWIO).build() + otm.representations = [self.diagram.representation.otm] + otm.components = [c.otm for c in self.diagram.components] + otm.dataflows = [d.otm for d in self.diagram.dataflows] + otm.trustzones = [t.otm for t in self.diagram.trustzones] + + return otm diff --git a/slp_drawio/slp_drawio/validate/drawio_validator.py b/slp_drawio/slp_drawio/validate/drawio_validator.py index 84941720..ddb0482d 100644 --- a/slp_drawio/slp_drawio/validate/drawio_validator.py +++ b/slp_drawio/slp_drawio/validate/drawio_validator.py @@ -1,23 +1,57 @@ +import copy import logging +import os +import string +import uuid -from slp_base import ProviderValidator +import xmlschema + +from slp_base import ProviderValidator, DiagramFileNotValidError, DiagramType +from slp_base.slp_base.provider_validator import generate_size_error, generate_content_type_error, generate_schema_error logger = logging.getLogger(__name__) +MAX_SIZE = 10 * 1024 * 1024 +MIN_SIZE = 10 + +path = os.path.dirname(__file__) + class DrawioValidator(ProviderValidator): def __init__(self, data): super(DrawioValidator, self).__init__() self.data = data + self.provider = DiagramType.DRAWIO + self.xsd_schema = f'{path}/../../resources/schemas/drawio_schema.xsd' def validate(self): logger.info('Validating Drawio file') self.__validate_size() self.__validate_content_type() + self.__sanitize_name() + self.__validate_schema() def __validate_size(self): - pass + size = self.data.size + if size > MAX_SIZE or size < MIN_SIZE: + raise generate_size_error(self.provider, 'diag_file', DiagramFileNotValidError) def __validate_content_type(self): - pass + mime = self.data.content_type + if mime not in self.provider.valid_mime: + raise generate_content_type_error(self.provider, 'diag_file', DiagramFileNotValidError) + + def __sanitize_name(self): + ext = self.data.filename.split('.')[-1] + ext = "".join([c for c in ext if c in string.ascii_letters]) + name = str(uuid.uuid4()) + self.data.filename = f'{name}.{ext}' + + def __validate_schema(self): + schema = xmlschema.XMLSchema(self.xsd_schema) + try: + file_copy = copy.deepcopy(self.data.file) + schema.validate(file_copy) + except Exception: + raise generate_schema_error(self.provider, 'diag_file', DiagramFileNotValidError) diff --git a/slp_drawio/tests/resources/drawio/not_xml.drawio b/slp_drawio/tests/resources/drawio/not_xml.drawio new file mode 100644 index 00000000..393c4e9b --- /dev/null +++ b/slp_drawio/tests/resources/drawio/not_xml.drawio @@ -0,0 +1,2 @@ +WRONG_CONTENT +NOT XML \ No newline at end of file diff --git a/slp_drawio/tests/resources/drawio/wrong_mxcell.drawio b/slp_drawio/tests/resources/drawio/wrong_mxcell.drawio new file mode 100644 index 00000000..f0b6fae0 --- /dev/null +++ b/slp_drawio/tests/resources/drawio/wrong_mxcell.drawio @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/slp_drawio/tests/resources/drawio/wrong_mxfile.drawio b/slp_drawio/tests/resources/drawio/wrong_mxfile.drawio new file mode 100644 index 00000000..e80bb1b2 --- /dev/null +++ b/slp_drawio/tests/resources/drawio/wrong_mxfile.drawio @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/slp_drawio/tests/resources/drawio/wrong_mxgraphmodel.drawio b/slp_drawio/tests/resources/drawio/wrong_mxgraphmodel.drawio new file mode 100644 index 00000000..99d1c6a9 --- /dev/null +++ b/slp_drawio/tests/resources/drawio/wrong_mxgraphmodel.drawio @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/slp_drawio/tests/resources/drawio/wrong_root.drawio b/slp_drawio/tests/resources/drawio/wrong_root.drawio new file mode 100644 index 00000000..11985a3b --- /dev/null +++ b/slp_drawio/tests/resources/drawio/wrong_root.drawio @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/slp_drawio/tests/resources/test_resource_paths.py b/slp_drawio/tests/resources/test_resource_paths.py index f12eccef..09724908 100644 --- a/slp_drawio/tests/resources/test_resource_paths.py +++ b/slp_drawio/tests/resources/test_resource_paths.py @@ -7,3 +7,8 @@ aws_minimal_xml = f'{drawio}/aws_minimal.drawio.xml' aws_minimal_drawio = f'{drawio}/aws_minimal.drawio' aws_minimal_drawio_as_json = f'{drawio}/aws_minimal_source.json' +wrong_mxcell_drawio = f'{drawio}/wrong_mxcell.drawio' +wrong_mxfile_drawio = f'{drawio}/wrong_mxfile.drawio' +wrong_mxgraphmodel_drawio = f'{drawio}/wrong_mxgraphmodel.drawio' +wrong_root_drawio = f'{drawio}/wrong_root.drawio' +not_xml = f'{drawio}/not_xml.drawio' diff --git a/slp_drawio/tests/validate/__init__.py b/slp_drawio/tests/validate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/slp_drawio/tests/validate/test_drawio_validator.py b/slp_drawio/tests/validate/test_drawio_validator.py new file mode 100644 index 00000000..b30e511b --- /dev/null +++ b/slp_drawio/tests/validate/test_drawio_validator.py @@ -0,0 +1,116 @@ +from unittest.mock import Mock + +import pytest +from starlette.datastructures import UploadFile, Headers + +from sl_util.sl_util import secure_regex as re +from sl_util.tests.util.file_utils import get_upload_file + +from slp_base import DiagramFileNotValidError, CommonError +from slp_drawio.slp_drawio.validate.drawio_validator import DrawioValidator +from slp_drawio.tests.resources.test_resource_paths import wrong_mxgraphmodel_drawio, wrong_mxfile_drawio, \ + wrong_mxcell_drawio, wrong_root_drawio, aws_minimal_drawio, aws_minimal_xml, not_xml + + +filename_pattern = re.compile('^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}\\.[drawio|xml]') + + +class TestDrawioValidator: + + @pytest.mark.parametrize('size_value', [ + pytest.param(9.999, id="less than expected min"), + pytest.param(10.001 * 1024 * 1024, id="more than expected max") + ]) + def test_invalid_sizes(self, size_value): + # GIVEN the validator with an invalid size + validator = DrawioValidator(Mock(size=size_value)) + + # WHEN we validate + with pytest.raises(DiagramFileNotValidError) as error_info: + validator.validate() + + # THEN the error raised is as expected + assert error_info.typename == 'DiagramFileNotValidError' + assert error_info.value.message == 'Provided diag_file is not valid. Invalid size' + + @pytest.mark.parametrize('mime', [ + 'application/zip', 'application/pdf', 'text/html' + ]) + def test_invalid_mimetype(self, mime): + # GIVEN the validator with an invalid size + validator = DrawioValidator(Mock(content_type=mime, size=10)) + + # WHEN we validate + with pytest.raises(DiagramFileNotValidError) as error_info: + validator.validate() + + # THEN the error raised is as expected + assert error_info.typename == 'DiagramFileNotValidError' + assert error_info.value.message == 'Invalid content type for diag_file' + + @pytest.mark.parametrize('filepath', [ + pytest.param(wrong_root_drawio, id='wrong_root'), + pytest.param(wrong_mxcell_drawio, id='wrong_mxcell'), + pytest.param(wrong_mxfile_drawio, id='wrong_mxfile'), + pytest.param(wrong_mxgraphmodel_drawio, id='wrong_mxgraphmodel'), + pytest.param(not_xml, id='not_xml') + ]) + def test_invalid_schema(self, filepath: str): + # GIVEN the mocked file + file: UploadFile = get_upload_file(filepath) + file.size = 10 + file.headers = Headers({'content-type': 'application/octet-stream'}) + # AND the validator + validator = DrawioValidator(file) + + # WHEN we validate + with pytest.raises(DiagramFileNotValidError) as error_info: + validator.validate() + + # THEN the error raised is as expected + assert error_info.typename == 'DiagramFileNotValidError' + assert error_info.value.message == 'Provided diag_file is not valid. It does not comply with schema' + + @pytest.mark.parametrize('mime, size, filepath', [ + pytest.param('application/octet-stream', 10, aws_minimal_drawio, id='encoded-tiny-binary'), + pytest.param('application/xml', 10 * 1024 * 1024, aws_minimal_xml, id='xml-big-xml'), + pytest.param('text/plain', 10 * 1024 * 1024, aws_minimal_drawio, id='encoded-big-text') + ]) + def test_valid_file(self, mime: str, size: int, filepath: str): + # GIVEN the mocked file + file: UploadFile = get_upload_file(filepath) + file.size = size + file.headers = Headers({'content-type': mime}) + # AND the validator + validator = DrawioValidator(file) + + # WHEN we validate THEN no CommonError is raised + try: + validator.validate() + except CommonError: + assert False + + # AND the filename is an uuid and the original extension + match = filename_pattern.match(file.filename) + assert match + + @pytest.mark.parametrize('ext', [ + pytest.param('\x1C\x00d\x08r\x7Fawio\x0D', id='control-chars'), + pytest.param(' x\x200\x2Am\x26\x25l\x2F', id='printable-non-alpha'), + pytest.param('0d1r2a3w4i5o90', id='numeric') + ]) + def test_filename_sanitized(self, ext: str): + # GIVEN the mocked file + file: UploadFile = get_upload_file(aws_minimal_drawio) + file.size = 10 + file.filename = f'e.xam.ple.{ext}' + file.headers = Headers({'content-type': 'application/xml'}) + # AND the validator + validator = DrawioValidator(file) + + # WHEN we validate + validator.validate() + + # THEN the filename is an uuid and a valid extension + match = filename_pattern.match(file.filename) + assert match diff --git a/slp_visio/tests/integration/test_visio_processor.py b/slp_visio/tests/integration/test_visio_processor.py index 44fd9c76..ca2fbb15 100644 --- a/slp_visio/tests/integration/test_visio_processor.py +++ b/slp_visio/tests/integration/test_visio_processor.py @@ -14,7 +14,7 @@ expected_visio_aws_with_tz_and_vpc, expected_visio_orphan_dataflows, expected_visio_bidirectional_connectors, \ expected_visio_modified_single_connectors, visio_nested_tzs, expected_visio_nested_tzs, default_visio_mapping, \ visio_nested_tzs_inside_component, expected_visio_nested_tzs_inside_component -from slp_visio.tests.util.files import file_exists, get_upload_file +from sl_util.tests.util.file_utils import file_exists, get_upload_file class TestVisioProcessor: