From fbb243a5002cab03d2c848d2d5f94520e3e52763 Mon Sep 17 00:00:00 2001 From: Simon Brown Date: Tue, 19 Sep 2023 11:40:01 +0100 Subject: [PATCH] Adds a way to set the character encoding used by the DSL parser (see https://github.com/structurizr/dsl/issues/338). --- docs/changelog.md | 1 + .../structurizr/dsl/StructurizrDslParser.java | 17 ++++++++++++++++- src/test/dsl/iso-8859.dsl | 5 +++++ src/test/java/com/structurizr/dsl/DslTests.java | 9 +++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 src/test/dsl/iso-8859.dsl diff --git a/docs/changelog.md b/docs/changelog.md index 445cb7a..534ae89 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -5,6 +5,7 @@ - DSL identifiers (if present) will now be loaded when extending a JSON workspace (see https://github.com/structurizr/dsl/discussions/328). - Adds a `context` variable to inline/external scripts (see https://github.com/structurizr/dsl/issues/332). - Fixes https://github.com/structurizr/dsl/issues/324 (Groups with no curly braces breaks diagrams). +- Adds a way to set the character encoding used by the DSL parser (see https://github.com/structurizr/dsl/issues/338). ## 1.32.0 (28th July 2023) diff --git a/src/main/java/com/structurizr/dsl/StructurizrDslParser.java b/src/main/java/com/structurizr/dsl/StructurizrDslParser.java index 98ebe7b..ea678c9 100644 --- a/src/main/java/com/structurizr/dsl/StructurizrDslParser.java +++ b/src/main/java/com/structurizr/dsl/StructurizrDslParser.java @@ -7,6 +7,7 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.*; @@ -32,6 +33,7 @@ public final class StructurizrDslParser extends StructurizrDslTokens { private static final String STRUCTURIZR_DSL_IDENTIFIER_PROPERTY_NAME = "structurizr.dsl.identifier"; + private Charset characterEncoding = StandardCharsets.UTF_8; private IdentifierScope identifierScope = IdentifierScope.Flat; private Stack contextStack; private Set parsedTokens = new HashSet<>(); @@ -53,6 +55,19 @@ public StructurizrDslParser() { constants = new HashMap<>(); } + /** + * Provides a way to change the character encoding used by the DSL parser. + * + * @param characterEncoding a Charset instance + */ + public void setCharacterEncoding(Charset characterEncoding) { + if (characterEncoding == null) { + throw new IllegalArgumentException("A character encoding must be specified"); + } + + this.characterEncoding = characterEncoding; + } + IdentifierScope getIdentifierScope() { return identifierScope; } @@ -125,7 +140,7 @@ public void parse(File path) throws StructurizrDslParserException { List files = FileUtils.findFiles(path); try { for (File file : files) { - parse(Files.readAllLines(file.toPath(), StandardCharsets.UTF_8), file); + parse(Files.readAllLines(file.toPath(), characterEncoding), file); } } catch (IOException e) { throw new StructurizrDslParserException(e.getMessage()); diff --git a/src/test/dsl/iso-8859.dsl b/src/test/dsl/iso-8859.dsl new file mode 100644 index 0000000..caad37c --- /dev/null +++ b/src/test/dsl/iso-8859.dsl @@ -0,0 +1,5 @@ +workspace { + model { + softwareSystem "Namé" + } +} \ No newline at end of file diff --git a/src/test/java/com/structurizr/dsl/DslTests.java b/src/test/java/com/structurizr/dsl/DslTests.java index 27bc9a2..321db56 100644 --- a/src/test/java/com/structurizr/dsl/DslTests.java +++ b/src/test/java/com/structurizr/dsl/DslTests.java @@ -8,6 +8,7 @@ import org.junit.jupiter.params.provider.ValueSource; import java.io.File; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.ArrayList; import java.util.Base64; @@ -1029,4 +1030,12 @@ void test_GroupWithoutBrace() throws Exception { } } + @Test + void test_ISO8859Encoding() throws Exception { + StructurizrDslParser parser = new StructurizrDslParser(); + parser.setCharacterEncoding(StandardCharsets.ISO_8859_1); + parser.parse(new File("src/test/dsl/iso-8859.dsl")); + assertNotNull(parser.getWorkspace().getModel().getSoftwareSystemWithName("Namé")); + } + } \ No newline at end of file