diff --git a/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/internal/model/TMDocumentModelTest.java b/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/model/TMDocumentModelTest.java similarity index 99% rename from org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/internal/model/TMDocumentModelTest.java rename to org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/model/TMDocumentModelTest.java index b3435a4ed..6224fa6e7 100644 --- a/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/internal/model/TMDocumentModelTest.java +++ b/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/model/TMDocumentModelTest.java @@ -11,7 +11,7 @@ * - Mickael Istria (Red Hat Inc.) * - Sebastian Thomschke (Vegard IT GmbH) - add test cases */ -package org.eclipse.tm4e.ui.internal.model; +package org.eclipse.tm4e.ui.tests.internal.model; import static org.junit.jupiter.api.Assertions.*; @@ -29,6 +29,7 @@ import org.eclipse.tm4e.core.model.Range; import org.eclipse.tm4e.core.registry.IGrammarSource; import org.eclipse.tm4e.core.registry.Registry; +import org.eclipse.tm4e.ui.internal.model.TMDocumentModel; import org.eclipse.tm4e.ui.tests.support.TestUtils; import org.junit.function.ThrowingRunnable; import org.junit.jupiter.api.AfterEach; diff --git a/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/internal/themes/TMTokenProviderTest.java b/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/themes/TMTokenProviderTest.java similarity index 98% rename from org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/internal/themes/TMTokenProviderTest.java rename to org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/themes/TMTokenProviderTest.java index 7ceaa2f71..6906a4c97 100644 --- a/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/internal/themes/TMTokenProviderTest.java +++ b/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/themes/TMTokenProviderTest.java @@ -9,7 +9,7 @@ * Contributors: * Sebastian Thomschke (Vegard IT) - initial implementation *******************************************************************************/ -package org.eclipse.tm4e.ui.internal.themes; +package org.eclipse.tm4e.ui.tests.internal.themes; import static org.junit.jupiter.api.Assertions.*; @@ -21,6 +21,7 @@ import org.eclipse.swt.SWT; import org.eclipse.tm4e.core.registry.IThemeSource.ContentType; import org.eclipse.tm4e.core.theme.RGB; +import org.eclipse.tm4e.ui.internal.themes.TMThemeTokenProvider; import org.eclipse.tm4e.ui.themes.ColorManager; import org.junit.jupiter.api.Test; diff --git a/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/utils/DocumentInputStreamTest.java b/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/utils/DocumentInputStreamTest.java new file mode 100644 index 000000000..de4b3e5d1 --- /dev/null +++ b/org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/utils/DocumentInputStreamTest.java @@ -0,0 +1,163 @@ +/******************************************************************************* + * Copyright (c) 2024 Sebastian Thomschke and others. + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Sebastian Thomschke - initial implementation + *******************************************************************************/ +package org.eclipse.tm4e.ui.tests.internal.utils; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; + +import org.eclipse.core.resources.IFile; +import org.eclipse.core.resources.IProject; +import org.eclipse.core.resources.ResourcesPlugin; +import org.eclipse.core.runtime.CoreException; +import org.eclipse.jface.text.IDocument; +import org.eclipse.tm4e.ui.internal.utils.DocumentInputStream; +import org.eclipse.ui.editors.text.FileDocumentProvider; +import org.eclipse.ui.part.FileEditorInput; +import org.eclipse.ui.texteditor.IDocumentProvider; +import org.junit.Before; +import org.junit.Test; + +class DocumentInputStreamTest { + + private static final String TEST_ASCII = "Hello, World!"; + + private static final String EMOJI = "๐Ÿ˜Š"; + private static final int EMOJI_BYTES_LEN = EMOJI.getBytes(UTF_8).length; + private static final String JAPANESE = "ใ“ใ‚“ใซใกใฏ"; + private static final String TEST_UNICODE = EMOJI + JAPANESE; + private static final int TEST_UNICODE_BYTES_LEN = TEST_UNICODE.getBytes(UTF_8).length; + + private final IDocumentProvider documentProvider = new FileDocumentProvider(); + private IDocument document; + + @Before + public void setUp() throws CoreException { + final IProject p = ResourcesPlugin.getWorkspace().getRoot().getProject(getClass().getName() + System.currentTimeMillis()); + p.create(null); + p.open(null); + + IFile testFile = p.getFile("testfile"); + testFile.create(new ByteArrayInputStream(TEST_UNICODE.getBytes()), true, null); + final var editorInput = new FileEditorInput(testFile); + documentProvider.connect(editorInput); + document = documentProvider.getDocument(editorInput); + } + + @Test + public void testAvailable() throws IOException { + document.set(TEST_ASCII); + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + assertEquals(TEST_ASCII.length(), is.available()); + final byte[] buffer = new byte[4]; + is.read(buffer); + assertEquals(TEST_ASCII.length() - 4, is.available()); + is.readAllBytes(); + assertEquals(0, is.available()); + } + + document.set(TEST_UNICODE); + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + assertTrue(is.available() > 0); + is.read(new byte[10]); + assertTrue(is.available() > 0); + is.readAllBytes(); + assertEquals(0, is.available()); + } + } + + @Test + public void testEndOfStream() throws IOException { + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + is.skip(Long.MAX_VALUE); + assertEquals(-1, is.read()); + } + } + + @Test + public void testReadEachByte() throws IOException { + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + final var bytesRead = new ArrayList(); + int b; + while ((b = is.read()) != -1) { + bytesRead.add((byte) b); + } + + final byte[] byteArray = new byte[bytesRead.size()]; + for (int i = 0; i < bytesRead.size(); i++) { + byteArray[i] = bytesRead.get(i); + } + assertEquals(TEST_UNICODE, new String(byteArray, UTF_8)); + } + } + + @Test + public void testReadIntoByteArray() throws IOException { + final byte[] buffer = new byte[1024]; // Buffer to read a portion of the text + + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + final int bytesRead = is.read(buffer, 0, buffer.length); + + assertEquals(TEST_UNICODE, new String(buffer, 0, bytesRead, UTF_8)); + } + } + + @Test + public void testSkip() throws IOException { + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + // skip emoji + final long skipped = is.skip(EMOJI_BYTES_LEN); + assertEquals(EMOJI_BYTES_LEN, skipped); + + final byte[] japanese = new byte[TEST_UNICODE_BYTES_LEN]; + final int bytesRead = is.read(japanese); + + assertEquals(JAPANESE, new String(japanese, 0, bytesRead, UTF_8)); + } + } + + @Test + public void testHighSurrogateAtEndOfInput() throws IOException { + document.set(new String(new char[] { 'A', '\uD800' })); // valid char followed by an isolated high surrogate + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + final byte[] result = is.readAllBytes(); + final String output = new String(result, UTF_8); + + // the high surrogate at the end should be replaced by the + // Unicode replacement char + assertEquals("A\uFFFD", output); + } + } + + @Test + public void testHighSurrogateWithoutLowSurrogate() throws IOException { + document.set(new String(new char[] { '\uD800', 'A' })); // \uD800 is a high surrogate, followed by 'A' + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + final byte[] result = is.readAllBytes(); + final String output = new String(result, UTF_8); + + // the invalid surrogate pair should be replaced by the Unicode replacement char + assertEquals("\uFFFD" + "A", output); + } + } +} diff --git a/org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/CharsInputStream.java b/org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/CharsInputStream.java index 3ac479758..00a10d550 100644 --- a/org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/CharsInputStream.java +++ b/org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/CharsInputStream.java @@ -26,11 +26,11 @@ class CharsInputStream extends InputStream { @FunctionalInterface - interface CharsSupplier { + public interface CharsSupplier { char charAt(int index) throws Exception; } - enum EncoderState { + private enum EncoderState { ENCODING, FLUSHING, DONE @@ -100,6 +100,10 @@ public int available() { return remaining == 0 ? charsLength.getAsInt() - charIndex : remaining; } + public Charset getCharset() { + return encoder.charset(); + } + private boolean flushEncoder() throws IOException { if (encoderState == EncoderState.DONE) return false; diff --git a/org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/DocumentInputStream.java b/org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/DocumentInputStream.java index c5934133a..de84fd40f 100644 --- a/org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/DocumentInputStream.java +++ b/org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/DocumentInputStream.java @@ -20,7 +20,7 @@ import org.eclipse.jface.text.IDocument; import org.eclipse.tm4e.ui.TMUIPlugin; -final class DocumentInputStream extends CharsInputStream { +public final class DocumentInputStream extends CharsInputStream { private static @Nullable Charset getCharset(final IDocument document) { final ITextFileBufferManager bufferManager = FileBuffers.getTextFileBufferManager(); @@ -39,7 +39,7 @@ final class DocumentInputStream extends CharsInputStream { return null; } - DocumentInputStream(final IDocument doc) { + public DocumentInputStream(final IDocument doc) { super(doc::getChar, doc::getLength, getCharset(doc)); } } diff --git a/pom.xml b/pom.xml index 10dfdf365..ce8a96a3d 100644 --- a/pom.xml +++ b/pom.xml @@ -305,7 +305,7 @@ true true - -Xms512m -Xmx512m ${tycho-surefire-plugin.vmargs} + -Xms512m -Xmx512m -Dfile.encoding=${project.build.sourceEncoding} ${tycho-surefire-plugin.vmargs}