diff --git a/htmlSanityCheck-cli/src/main/groovy/org/aim42/htmlsanitycheck/cli/HscCommand.groovy b/htmlSanityCheck-cli/src/main/groovy/org/aim42/htmlsanitycheck/cli/HscCommand.groovy index c2147080..80a1a79c 100644 --- a/htmlSanityCheck-cli/src/main/groovy/org/aim42/htmlsanitycheck/cli/HscCommand.groovy +++ b/htmlSanityCheck-cli/src/main/groovy/org/aim42/htmlsanitycheck/cli/HscCommand.groovy @@ -92,6 +92,10 @@ class HscCommand implements Runnable { @Option(names = ["-e", "--exclude"], description = "Exclude remote patterns to check", split = ',') Pattern[] excludes = [] + @Option(names = ["-o", "--junitOutputStyle"], + description = "JUnit output style: FLAT (all files in one directory, default) or HIERARCHICAL (mirrors source structure)") + Configuration.JunitOutputStyle junitOutputStyle + @Parameters(index = "0", arity = "0..1", description = "base directory (default: current directory)") File srcDir = new File(".").getAbsoluteFile() @@ -177,6 +181,7 @@ class HscCommand implements Runnable { .checkingResultsDir(resultsDirectory) .checksToExecute(AllCheckers.CHECKER_CLASSES) .excludes(hscCommand.excludes as Set) + .junitOutputStyle(hscCommand.junitOutputStyle) .build() // if we have no valid configuration, abort with exception diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/AllChecksRunner.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/AllChecksRunner.java index b58ad59a..4e3554d7 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/AllChecksRunner.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/AllChecksRunner.java @@ -53,6 +53,9 @@ public class AllChecksRunner { // keep all results private final PerRunResults resultsForAllPages; + // configuration (needed for junit output style) + private final Configuration configuration; + private static final Logger logger = LoggerFactory.getLogger(AllChecksRunner.class); /** @@ -62,6 +65,7 @@ public class AllChecksRunner { public AllChecksRunner(Configuration configuration) { super(); + this.configuration = configuration; this.filesToCheck = configuration.getSourceDocuments(); // TODO: #185 (checker classes shall be detected automatically (aka CheckerFactory) @@ -175,7 +179,8 @@ private void reportCheckingResultsAsHTML(String resultsDir) { * Report results in JUnit XML */ private void reportCheckingResultsAsJUnitXml(String resultsDir) { - Reporter reporter = new JUnitXmlReporter(resultsForAllPages, resultsDir); + Reporter reporter = new JUnitXmlReporter(resultsForAllPages, resultsDir, + configuration.getJunitOutputStyle()); reporter.reportFindings(); } } \ No newline at end of file diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java index f9d4a627..ab1b1909 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/Configuration.java @@ -32,6 +32,41 @@ @ToString @Slf4j public class Configuration { + + /** + * Defines the output style for JUnit XML reports. + *

+ * This configuration option controls how JUnit XML report files are organized + * in the output directory. + * + * @since 2.0.0 + */ + public enum JunitOutputStyle { + /** + * Flat file structure where all JUnit XML reports are stored in a single directory. + * The entire file path is encoded into the filename using underscores. + *

+ * Example: {@code build/test-results/htmlchecks/TEST-unit-html-_docs_guide_installation.xml} + *

+ * This is the default for backwards compatibility, but may fail with + * "File name too long" errors for deeply nested directory structures. + */ + FLAT, + + /** + * Hierarchical directory structure where JUnit XML reports are organized + * in subdirectories that mirror the source file structure. + *

+ * Example: {@code build/test-results/htmlchecks/docs/guide/TEST-installation.xml} + *

+ * This avoids filename length issues and provides more intuitive organization. + * Recommended for projects with deeply nested directory structures. + * + * @see Issue 405 + */ + HIERARCHICAL + } + Set sourceDocuments; File sourceDir; File checkingResultsDir; @@ -52,6 +87,8 @@ public class Configuration { Set excludes = new HashSet<>(); @Builder.Default Set indexFilenames = defaultIndeFilenames(); + @Builder.Default + JunitOutputStyle junitOutputStyle = JunitOutputStyle.FLAT; /* * Explanation for configuring http status codes: @@ -79,6 +116,7 @@ public Configuration() { this.indexFilenames = defaultIndeFilenames(); this.prefixOnlyHrefExtensions = Web.POSSIBLE_EXTENSIONS; + this.junitOutputStyle = JunitOutputStyle.FLAT;// FLAT for backwards compatibility this.checksToExecute = AllCheckers.CHECKER_CLASSES; } diff --git a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/report/JUnitXmlReporter.java b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/report/JUnitXmlReporter.java index 523f4e61..ea5afd9c 100644 --- a/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/report/JUnitXmlReporter.java +++ b/htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/report/JUnitXmlReporter.java @@ -1,5 +1,6 @@ package org.aim42.htmlsanitycheck.report; +import org.aim42.htmlsanitycheck.Configuration; import org.aim42.htmlsanitycheck.collect.Finding; import org.aim42.htmlsanitycheck.collect.PerRunResults; import org.aim42.htmlsanitycheck.collect.SingleCheckResults; @@ -11,6 +12,7 @@ import java.io.File; import java.io.FileWriter; import java.io.IOException; +import java.nio.file.Path; import java.util.UUID; /************************************************************************ @@ -36,13 +38,25 @@ /** * Write the findings' report to JUnit XML. Allows tools processing JUnit to * include the findings. + *

+ * Supports two output styles: + *

*/ public class JUnitXmlReporter extends Reporter { File outputPath; + Configuration.JunitOutputStyle outputStyle; public JUnitXmlReporter(PerRunResults runResults, String outputPath) { + this(runResults, outputPath, Configuration.JunitOutputStyle.FLAT); + } + + public JUnitXmlReporter(PerRunResults runResults, String outputPath, Configuration.JunitOutputStyle outputStyle) { super(runResults); this.outputPath = new File(outputPath); + this.outputStyle = outputStyle != null ? outputStyle : Configuration.JunitOutputStyle.FLAT; } @Override @@ -52,11 +66,15 @@ protected void initReport() { } } + // tag::reportPageSummary[] @Override protected void reportPageSummary(SinglePageResults singlePageResults) { String name = filenameOrTitleOrRandom(singlePageResults); - String sanitizedPath = name.replaceAll("[^A-Za-z0-9_-]+", "_"); - File testOutputFile = new File(outputPath, "TEST-unit-html-" + sanitizedPath + ".xml"); + + File testOutputFile = (outputStyle == Configuration.JunitOutputStyle.HIERARCHICAL) + ? getHierarchicalOutputFile(name) + : getFlatOutputFile(name); + // end::reportPageSummary[] XMLOutputFactory factory = XMLOutputFactory.newInstance(); try (FileWriter fileWriter = new FileWriter(testOutputFile)) { @@ -96,6 +114,73 @@ protected void reportPageSummary(SinglePageResults singlePageResults) { } } + /** + * Creates output file using flat structure (all files in one directory). + * Encodes the full path into the filename using underscores. + * + * @param name The source file path + * @return The output file for the JUnit XML report + */ + private File getFlatOutputFile(String name) { + String sanitizedPath = name.replaceAll("[^A-Za-z0-9_-]+", "_"); + return new File(outputPath, "TEST-unit-html-" + sanitizedPath + ".xml"); + } + + /** + * Creates output file using hierarchical structure (subdirectories mirror source structure). + * Solves filename length issues with deeply nested directories. + * + * @param name The source file path + * @return The output file for the JUnit XML report + */ + private File getHierarchicalOutputFile(String name) { + // Parse the path to extract directory structure and filename + File sourcePath = new File(name); + File parentDir = sourcePath.getParentFile(); + String fileName = sourcePath.getName(); + + // Create directory structure under outputPath to mirror the source file hierarchy + File testOutputDir; + if (parentDir != null) { + // Normalize the path to handle relative references like ".." + // This ensures we stay within the outputPath and don't try to escape it + try { + File tempPath = new File(outputPath, parentDir.getPath()); + testOutputDir = tempPath.getCanonicalFile(); + + // Verify the canonical path is still under outputPath using NIO Path API + // This provides better security against path traversal attacks + Path normalizedOutputPath = outputPath.getCanonicalFile().toPath().normalize(); + Path normalizedTestOutputDir = testOutputDir.toPath().normalize(); + + if (!normalizedTestOutputDir.startsWith(normalizedOutputPath)) { + // Path tries to escape outputPath, so just use outputPath directly + testOutputDir = outputPath; + } + } catch (Exception e) { + // If normalization fails, fall back to outputPath + testOutputDir = outputPath; + } + } else { + testOutputDir = outputPath; + } + + // Ensure the directory exists + if (!testOutputDir.exists() && !testOutputDir.mkdirs()) { + StringBuilder errorMsg = new StringBuilder("Cannot create directory: ") + .append(testOutputDir.getAbsolutePath()); + errorMsg.append(" (exists: ").append(testOutputDir.exists()) + .append(", parent canWrite: ") + .append(testOutputDir.getParentFile() != null ? testOutputDir.getParentFile().canWrite() : "unknown") + .append(")"); + throw new RuntimeException(errorMsg.toString()); //NOSONAR(S112) + } + + // Create the test file with a simple, sanitized filename + String sanitizedFileName = fileName.replaceAll("[^A-Za-z0-9_.-]+", "_"); + return new File(testOutputDir, "TEST-" + sanitizedFileName + ".xml"); + } + private static String filenameOrTitleOrRandom(SinglePageResults pageResult) { if (pageResult.getPageFilePath() != null) { return pageResult.getPageFilePath(); diff --git a/htmlSanityCheck-core/src/test/groovy/org/aim42/htmlsanitycheck/report/JUnitXmlReporterTest.groovy b/htmlSanityCheck-core/src/test/groovy/org/aim42/htmlsanitycheck/report/JUnitXmlReporterTest.groovy index 6b086dbb..82de6741 100644 --- a/htmlSanityCheck-core/src/test/groovy/org/aim42/htmlsanitycheck/report/JUnitXmlReporterTest.groovy +++ b/htmlSanityCheck-core/src/test/groovy/org/aim42/htmlsanitycheck/report/JUnitXmlReporterTest.groovy @@ -1,5 +1,6 @@ package org.aim42.htmlsanitycheck.report +import org.aim42.htmlsanitycheck.Configuration import org.aim42.htmlsanitycheck.collect.Finding import org.aim42.htmlsanitycheck.collect.PerRunResults import org.aim42.htmlsanitycheck.collect.SingleCheckResults @@ -9,6 +10,7 @@ import org.junit.Before import org.junit.Test import static org.junit.Assert.assertEquals +import static org.junit.Assert.assertNotNull import static org.junit.Assert.assertTrue // see end-of-file for license information @@ -42,23 +44,27 @@ class JUnitXmlReporterTest { void tearDown() { if (outputPath) { outputPath.traverse { - System.err.println "${it}: ${it.text}" + if (it.isFile()) { + System.err.println "${it}: ${it.text}" + } else { + System.err.println "${it}: [directory]" + } } } outputPath?.deleteDir() } @Test(expected = RuntimeException.class) - void testInitReportWithNonWritableDirectory() throws IOException { - // Create a temporary directory - File tempDir = tempFolder.newFolder() + void testInitReportWithNonWritableDirectory() { + // Create a path that cannot be created (using a non-existent parent and restricted path) + File nonExistentPath = new File("/nonexistent/path/that/cannot/be/created") - // Make the directory non-writable - assertTrue("Could not make temp directory non-writable", tempDir.setWritable(false)) - - // Create a new JUnitXmlReporter with the non-writable directory + // Try to create a JUnitXmlReporter with a path that cannot be created PerRunResults runResults = new PerRunResults() - new JUnitXmlReporter(runResults, tempDir.getAbsolutePath()).initReport() + JUnitXmlReporter reporter = new JUnitXmlReporter(runResults, nonExistentPath.getAbsolutePath()) + + // This should throw RuntimeException because the path cannot be created + reporter.initReport() } @Test @@ -89,7 +95,7 @@ class JUnitXmlReporterTest { addSingleCheckResultsToReporter( singleCheckResults ) reporter.reportFindings() - def testsuite = new XmlSlurper().parse(outputPath.listFiles()[0]) + def testsuite = new XmlSlurper().parse(findFirstXmlFile(outputPath)) assertEquals("Zero checks expected", "0", testsuite.@tests.text()) assertEquals("Zero findings expected", "0", testsuite.@failures.text()) assertEquals("Zero testcases expected", 1, testsuite.testcase.size()) @@ -102,7 +108,7 @@ class JUnitXmlReporterTest { addSingleCheckResultsToReporter( singleCheckResults ) reporter.reportFindings() - def testsuite = new XmlSlurper().parse(outputPath.listFiles()[0]) + def testsuite = new XmlSlurper().parse(findFirstXmlFile(outputPath)) assertEquals("expected no check", "0", testsuite.@tests.text()) assertEquals("expected one finding", "1", testsuite.@failures.text()) assertEquals("One testcase expected", 1, testsuite.testcase.size()) @@ -118,7 +124,7 @@ class JUnitXmlReporterTest { addSingleCheckResultsToReporter( singleCheckResults ) reporter.reportFindings() - def testsuite = new XmlSlurper().parse(outputPath.listFiles()[0]) + def testsuite = new XmlSlurper().parse(findFirstXmlFile(outputPath)) assertEquals("Expect one finding", "1", testsuite.@failures.text()) assertEquals("Expect one check", "1", testsuite.@tests.text()) assertEquals("One testcase expected", 1, testsuite.testcase.size()) @@ -134,7 +140,7 @@ class JUnitXmlReporterTest { addSingleCheckResultsToReporter( singleCheckResults ) reporter.reportFindings() - def testsuite = new XmlSlurper().parse(outputPath.listFiles()[0]) + def testsuite = new XmlSlurper().parse(findFirstXmlFile(outputPath)) assertEquals("Expect one finding", "1", testsuite.@failures.text()) assertEquals("Expect ten checks", "10", testsuite.@tests.text()) assertEquals("Expect one testcase", 1, testsuite.testcase.size()) @@ -152,7 +158,7 @@ class JUnitXmlReporterTest { addSingleCheckResultsToReporter( singleCheckResults ) reporter.reportFindings() - def testsuite = new XmlSlurper().parse(outputPath.listFiles()[0]) + def testsuite = new XmlSlurper().parse(findFirstXmlFile(outputPath)) assertEquals("Expect three findings", "3", testsuite.@failures.text()) assertEquals("Expect ten checks", "10", testsuite.@tests.text()) assertEquals("Expect one testcases", 1, testsuite.testcase.size()) @@ -168,7 +174,7 @@ class JUnitXmlReporterTest { addSingleCheckResultsToReporter( singleCheckResults ) reporter.reportFindings() - def testsuite = new XmlSlurper().parse(outputPath.listFiles()[0]) + def testsuite = new XmlSlurper().parse(findFirstXmlFile(outputPath)) assertEquals("Expect one finding", "1", testsuite.@failures.text()) assertEquals("Expect six checks", "6", testsuite.@tests.text()) assertEquals("Expect one testcases", 1, testsuite.testcase.size()) @@ -188,7 +194,7 @@ class JUnitXmlReporterTest { addSingleCheckResultsToReporter( singleCheckResults ) reporter.reportFindings() - def testsuite = new XmlSlurper().parse(outputPath.listFiles()[0]) + def testsuite = new XmlSlurper().parse(findFirstXmlFile(outputPath)) assertEquals("Expect $nrOfFindings findings", nrOfFindings as String, testsuite.@failures.text() ) assertEquals("Expect $nrOfChecks checks", nrOfChecks as String, testsuite.@tests.text() ) assertEquals("Expect one testcase", 1, testsuite.testcase.size()) @@ -201,4 +207,459 @@ class JUnitXmlReporterTest { spr.addResultsForSingleCheck( scr ) reporter.addCheckingResultsForOnePage( spr ) } + + // Helper method to find XML files recursively in a directory + private File findFirstXmlFile(File dir) { + File[] files = dir.listFiles() + if (files == null) return null + + // First look for XML files in current directory + for (File file : files) { + if (file.isFile() && file.name.endsWith('.xml')) { + return file + } + } + + // Then recurse into subdirectories + for (File file : files) { + if (file.isDirectory()) { + File found = findFirstXmlFile(file) + if (found != null) { + return found + } + } + } + + return null + } + + // Tests for FLAT output style (default, backwards compatible) + + @Test + void testFlatModeCreatesEncodedFilename() { + // Given: a page with a nested path + SinglePageResults pageWithPath = new SinglePageResults( + "about.html", + "docs/guide/about.html", + "About Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(pageWithPath) + + // When: we generate the report in FLAT mode (explicit) + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.FLAT) + .reportPageSummary(pageWithPath) + + // Then: the file should be created in the root with encoded path + File[] files = outputPath.listFiles() + assertEquals("Should have exactly one file in root", 1, files.length) + assertTrue("Filename should contain encoded path", + files[0].name.contains("docs") && files[0].name.contains("guide")) + assertTrue("Filename should start with TEST-unit-html-", files[0].name.startsWith("TEST-unit-html-")) + + def testsuite = new XmlSlurper().parse(files[0]) + assertEquals("docs/guide/about.html", testsuite.@name.text()) + } + + @Test + void testFlatModeIsDefaultWhenNotSpecified() { + // Given: a page with a nested path + SinglePageResults pageWithPath = new SinglePageResults( + "about.html", + "docs/guide/about.html", + "About Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(pageWithPath) + + // When: we generate the report WITHOUT specifying mode (should default to FLAT) + new JUnitXmlReporter(runResults, outputPath.absolutePath) + .reportPageSummary(pageWithPath) + + // Then: the file should be created in the root with encoded path (FLAT behavior) + File[] files = outputPath.listFiles() + assertEquals("Should have exactly one file in root", 1, files.length) + assertTrue("Filename should contain encoded path", + files[0].name.contains("docs") && files[0].name.contains("guide")) + + def testsuite = new XmlSlurper().parse(files[0]) + assertEquals("docs/guide/about.html", testsuite.@name.text()) + } + + // Tests for hierarchical directory structure (issue #405) + + @Test(expected = RuntimeException.class) + void testHierarchicalModeFailsWhenCannotCreateDirectory() { + // Given: an output path that's a file (not a directory) + File tempFile = File.createTempFile("test", ".txt") + tempFile.deleteOnExit() + + SinglePageResults pageWithPath = new SinglePageResults( + "about.html", + "docs/guide/about.html", + "About Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(pageWithPath) + + // When: we try to generate a report in HIERARCHICAL mode with a file as output path + // Then: it should throw RuntimeException because it cannot create subdirectories + new JUnitXmlReporter(runResults, tempFile.getAbsolutePath(), Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(pageWithPath) + } + + @Test + void testSimpleFilenameCreatesFileInRootDirectory() { + // Given: a page with a simple filename (no directory path) + SinglePageResults singlePageResultsWithSimplePath = new SinglePageResults( + "index.html", + "index.html", + "Home Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(singlePageResultsWithSimplePath) + + // When: we generate the report in HIERARCHICAL mode + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(singlePageResultsWithSimplePath) + + // Then: the test file should be created directly in the output directory + File expectedFile = new File(outputPath, "TEST-index.html.xml") + assertTrue("Expected file in root: ${expectedFile.absolutePath}", expectedFile.exists()) + + def testsuite = new XmlSlurper().parse(expectedFile) + assertEquals("index.html", testsuite.@name.text()) + } + + @Test + void testSingleLevelDirectoryCreatesSubdirectory() { + // Given: a page with a single-level directory path + SinglePageResults singlePageResultsWithPath = new SinglePageResults( + "about.html", + "docs/about.html", + "About Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(singlePageResultsWithPath) + + // When: we generate the report in HIERARCHICAL mode + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(singlePageResultsWithPath) + + // Then: the test file should be created in a subdirectory + File expectedDir = new File(outputPath, "docs") + File expectedFile = new File(expectedDir, "TEST-about.html.xml") + assertTrue("Expected directory to exist: ${expectedDir.absolutePath}", expectedDir.exists()) + assertTrue("Expected file to exist: ${expectedFile.absolutePath}", expectedFile.exists()) + + def testsuite = new XmlSlurper().parse(expectedFile) + assertEquals("docs/about.html", testsuite.@name.text()) + } + + @Test + void testDeepNestedDirectoryCreatesFullHierarchy() { + // Given: a page with a deeply nested directory path + String deepPath = "docs/guide/user/installation/linux.html" + SinglePageResults singlePageResultsWithDeepPath = new SinglePageResults( + "linux.html", + deepPath, + "Linux Installation Guide", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(singlePageResultsWithDeepPath) + + // When: we generate the report in HIERARCHICAL mode + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(singlePageResultsWithDeepPath) + + // Then: the full directory hierarchy should be created + File expectedDir = new File(outputPath, "docs/guide/user/installation") + File expectedFile = new File(expectedDir, "TEST-linux.html.xml") + assertTrue("Expected directory hierarchy to exist: ${expectedDir.absolutePath}", expectedDir.exists()) + assertTrue("Expected file to exist: ${expectedFile.absolutePath}", expectedFile.exists()) + + def testsuite = new XmlSlurper().parse(expectedFile) + assertEquals(deepPath, testsuite.@name.text()) + } + + @Test + void testVeryLongPathDoesNotExceedFilenameLimit() { + // Given: a page with a very long path (reproducing issue #405) + // This creates a path longer than 255 characters when flattened to a single filename + String longPath = "very/long/path/with/many/nested/directories/that/would/exceed/filesystem/limits/" + + "if/flattened/into/a/single/filename/this/is/a/test/case/for/issue/405/" + + "more/directories/to/make/it/really/long/and/problematic/for/flat/structure/" + + "final/level/index.html" + + SinglePageResults singlePageResultsWithLongPath = new SinglePageResults( + "index.html", + longPath, + "Deep Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(singlePageResultsWithLongPath) + + // When: we generate the report in HIERARCHICAL mode (should not throw exception) + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(singlePageResultsWithLongPath) + + // Then: the file should be created successfully with proper directory structure + File parentPath = new File(longPath).parentFile + File expectedDir = new File(outputPath, parentPath.path) + File expectedFile = new File(expectedDir, "TEST-index.html.xml") + assertTrue("Expected directory hierarchy to exist: ${expectedDir.absolutePath}", expectedDir.exists()) + assertTrue("Expected file to exist: ${expectedFile.absolutePath}", expectedFile.exists()) + + // Verify the filename itself is short + assertTrue("Filename should be short", expectedFile.name.length() < 50) + + def testsuite = new XmlSlurper().parse(expectedFile) + assertEquals(longPath, testsuite.@name.text()) + } + + @Test + void testMultiplePagesCreateSeparateDirectories() { + // Given: multiple pages in different directories + SinglePageResults page1 = new SinglePageResults( + "index.html", + "docs/api/index.html", + "API Index", + 1000, + new ArrayList<>()) + SinglePageResults page2 = new SinglePageResults( + "index.html", + "docs/guide/index.html", + "Guide Index", + 1000, + new ArrayList<>()) + + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(page1) + runResults.addPageResults(page2) + + JUnitXmlReporter reporter = new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + + // When: we generate reports for both pages in HIERARCHICAL mode + reporter.reportPageSummary(page1) + reporter.reportPageSummary(page2) + + // Then: separate directory structures should be created + File apiDir = new File(outputPath, "docs/api") + File guideDir = new File(outputPath, "docs/guide") + File apiFile = new File(apiDir, "TEST-index.html.xml") + File guideFile = new File(guideDir, "TEST-index.html.xml") + + assertTrue("API directory should exist", apiDir.exists()) + assertTrue("Guide directory should exist", guideDir.exists()) + assertTrue("API test file should exist", apiFile.exists()) + assertTrue("Guide test file should exist", guideFile.exists()) + + // Verify content of both files + def apiTestsuite = new XmlSlurper().parse(apiFile) + assertEquals("docs/api/index.html", apiTestsuite.@name.text()) + + def guideTestsuite = new XmlSlurper().parse(guideFile) + assertEquals("docs/guide/index.html", guideTestsuite.@name.text()) + } + + @Test + void testFilenameWithSpecialCharactersIsSanitized() { + // Given: a filename with special characters + SinglePageResults pageWithSpecialChars = new SinglePageResults( + "my file (2024).html", + "docs/my file (2024).html", + "Special Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(pageWithSpecialChars) + + // When: we generate the report in HIERARCHICAL mode + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(pageWithSpecialChars) + + // Then: the filename should be sanitized but directory structure preserved + File expectedDir = new File(outputPath, "docs") + assertTrue("Directory should exist", expectedDir.exists()) + + // Find the generated file (name will be sanitized) + File[] files = expectedDir.listFiles() + assertTrue("Should have exactly one file", files != null && files.length == 1) + assertTrue("Filename should start with TEST-", files[0].name.startsWith("TEST-")) + assertTrue("Filename should be sanitized (no parentheses or spaces)", + !files[0].name.contains("(") && !files[0].name.contains(")")) + + def testsuite = new XmlSlurper().parse(files[0]) + assertEquals("docs/my file (2024).html", testsuite.@name.text()) + } + + @Test + void testRelativePathWithDotDotIsHandledCorrectly() { + // Given: a page with relative path containing .. (parent directory reference) + // Note: This tests edge case handling - in practice, paths should be normalized + SinglePageResults pageWithRelativePath = new SinglePageResults( + "index.html", + "docs/../public/index.html", + "Relative Path Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(pageWithRelativePath) + + // When: we generate the report in HIERARCHICAL mode + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(pageWithRelativePath) + + // Then: the file should be created (path handling depends on implementation) + // The implementation should handle this gracefully + File[] allFiles = outputPath.listFiles() + assertTrue("Should have created at least one file or directory", allFiles != null && allFiles.length > 0) + } + + @Test + void testPathTraversalAttackIsBlocked() { + // Given: a malicious path trying to escape the output directory + // This simulates a path traversal attack like "../../../etc/passwd" + SinglePageResults maliciousPage = new SinglePageResults( + "index.html", + "../../../malicious/path/index.html", + "Malicious Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(maliciousPage) + + // When: we generate the report in HIERARCHICAL mode + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(maliciousPage) + + // Then: the file should be created safely within outputPath, not outside it + File[] allFiles = outputPath.listFiles() + assertTrue("Should have created file or directory", allFiles != null && allFiles.length > 0) + + // Verify no files were created outside outputPath + def outputPathCanonical = outputPath.canonicalPath + def createdFile = findFirstXmlFile(outputPath) + assertNotNull(createdFile) + + // The created file should be within outputPath + assertTrue("File should be within output directory", + createdFile.canonicalPath.startsWith(outputPathCanonical)) + } + + @Test + void testPathTraversalWithSymlinkStyleAttackIsBlocked() { + // Given: a more sophisticated path traversal attack that tries to bypass simple checks + // Example: "validdir/../../escape/test.html" which could bypass startsWith() on strings + SinglePageResults sophisticatedAttack = new SinglePageResults( + "test.html", + "valid/../../../escape/test.html", + "Sophisticated Attack", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(sophisticatedAttack) + + // When: we generate the report in HIERARCHICAL mode + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(sophisticatedAttack) + + // Then: verify the file is safely contained + def outputPathCanonical = outputPath.canonicalPath + def createdFile = findFirstXmlFile(outputPath) + assertNotNull(createdFile) + + // Use NIO Path API to verify containment (same method as production code) + def normalizedOutputPath = outputPath.canonicalFile.toPath().normalize() + def normalizedCreatedPath = createdFile.canonicalFile.toPath().normalize() + + assertTrue("File should be within output directory using NIO Path API", + normalizedCreatedPath.startsWith(normalizedOutputPath)) + } + + @Test + void testEnhancedErrorMessageWhenDirectoryCreationFails() { + // Given: a non-existent parent directory that cannot be created + // We'll use a path that's invalid on the filesystem + File invalidOutputPath = new File("/nonexistent/deeply/nested/path/that/cannot/be/created") + + SinglePageResults page = new SinglePageResults( + "test.html", + "some/deep/path/test.html", + "Test Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(page) + + // When/Then: directory creation should fail with enhanced error message + try { + new JUnitXmlReporter(runResults, invalidOutputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(page) + fail("Should have thrown RuntimeException for directory creation failure") + } catch (RuntimeException e) { + // Verify the error message contains diagnostic information + String errorMsg = e.message + assertTrue("Error message should mention 'Cannot create directory'", + errorMsg.contains("Cannot create directory")) + assertTrue("Error message should contain full path", + errorMsg.contains(invalidOutputPath.absolutePath)) + assertTrue("Error message should include 'exists:' diagnostic", + errorMsg.contains("exists:")) + assertTrue("Error message should include 'parent canWrite:' diagnostic", + errorMsg.contains("parent canWrite:")) + } + } + + @Test + void testEnhancedErrorMessageFormatIsCorrect() { + // Given: setup that will trigger directory creation failure + File readOnlyParent = new File(outputPath, "readonly-parent") + readOnlyParent.mkdirs() + + // Try to make it read-only (this may not work on all platforms, especially Windows) + boolean madeReadOnly = readOnlyParent.setReadOnly() + + if (!madeReadOnly || readOnlyParent.canWrite()) { + // Skip test if we cannot make directory read-only on this platform + System.err.println("Skipping testEnhancedErrorMessageFormatIsCorrect - cannot make directory read-only on this platform") + return + } + + try { + SinglePageResults page = new SinglePageResults( + "test.html", + "readonly-parent/subdir/test.html", + "Test Page", + 1000, + new ArrayList<>()) + PerRunResults runResults = new PerRunResults() + runResults.addPageResults(page) + + // When: attempting to create subdirectory in read-only parent + new JUnitXmlReporter(runResults, outputPath.absolutePath, Configuration.JunitOutputStyle.HIERARCHICAL) + .reportPageSummary(page) + fail("Should have thrown RuntimeException") + } catch (RuntimeException e) { + // Then: error message should have proper format with parentheses + String errorMsg = e.message + assertTrue("Error message should contain opening parenthesis", + errorMsg.contains("(")) + assertTrue("Error message should contain closing parenthesis", + errorMsg.contains(")")) + // Should have format like: "... (exists: true, parent canWrite: false)" + assertTrue("Error message should match expected format pattern", + errorMsg.matches(".*\\(exists: .*, parent canWrite: .*\\).*")) + } finally { + // Cleanup: restore write permission + readOnlyParent.setWritable(true) + } + } } diff --git a/htmlSanityCheck-gradle-plugin/src/main/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTask.groovy b/htmlSanityCheck-gradle-plugin/src/main/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTask.groovy index bdead7ac..ade71ae6 100644 --- a/htmlSanityCheck-gradle-plugin/src/main/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTask.groovy +++ b/htmlSanityCheck-gradle-plugin/src/main/groovy/org/aim42/htmlsanitycheck/gradle/HtmlSanityCheckTask.groovy @@ -86,6 +86,11 @@ class HtmlSanityCheckTask extends DefaultTask { @Input Set excludes + // JUnit output style: FLAT (default, backwards compatible) or HIERARCHICAL (mirrors source structure) + @Optional + @Input + Configuration.JunitOutputStyle junitOutputStyle + @Input List> checkerClasses = AllCheckers.CHECKER_CLASSES @@ -194,6 +199,7 @@ See ${checkingResultsDir} for a detailed report.""" .checksToExecute(checkerClasses) .excludes(excludes.stream().map(Pattern::compile).collect(Collectors.toSet())) + .junitOutputStyle(junitOutputStyle) .build() // in case we have configured specific interpretations of http status codes diff --git a/htmlSanityCheck-maven-plugin/src/main/java/org/aim42/htmlsanitycheck/maven/HtmlSanityCheckMojo.java b/htmlSanityCheck-maven-plugin/src/main/java/org/aim42/htmlsanitycheck/maven/HtmlSanityCheckMojo.java index 4461d283..fa7deddc 100644 --- a/htmlSanityCheck-maven-plugin/src/main/java/org/aim42/htmlsanitycheck/maven/HtmlSanityCheckMojo.java +++ b/htmlSanityCheck-maven-plugin/src/main/java/org/aim42/htmlsanitycheck/maven/HtmlSanityCheckMojo.java @@ -200,6 +200,18 @@ public class HtmlSanityCheckMojo extends AbstractMojo { @Parameter private Set excludes = new HashSet<>(); + /** + * (optional) + * JUnit output style: FLAT (all files in one directory, default for backwards compatibility) + * or HIERARCHICAL (subdirectories mirror source structure, solves filename length issues). + *

+ * Type: JunitOutputStyle (FLAT or HIERARCHICAL). + *

+ * Default: FLAT. + */ + @Parameter + private Configuration.JunitOutputStyle junitOutputStyle; + static PerRunResults performChecks(Configuration myConfig) throws MojoExecutionException { try { AllChecksRunner allChecksRunner = new AllChecksRunner(myConfig); @@ -286,6 +298,7 @@ protected Configuration setupConfiguration() { .ignoreIPAddresses(ignoreIPAddresses) .checksToExecute(checkerClasses) + .junitOutputStyle(junitOutputStyle) .build(); // in case we have configured specific interpretations of http status codes diff --git a/src/docs/development/_includes/issue-405.adoc b/src/docs/development/_includes/issue-405.adoc new file mode 100644 index 00000000..241fa8a6 --- /dev/null +++ b/src/docs/development/_includes/issue-405.adoc @@ -0,0 +1,146 @@ +:filename: development/issue-405.adoc +include::../../_common.adoc[] + +== {issue-closed} "File name too long" error with deep paths in JUnit reports (405) + +=== Problem + +https://github.com/aim42/htmlSanityCheck/issues/405[Issue 405] reports that when using the htmlSanityCheck Gradle plugin within a subproject with deeply nested directory structures, a "File name too long" error occurs during the generation of JUnit XML reports. + +The error happens because the generated JUnit report filenames incorporate both the full path to the Gradle subproject and the deeply nested folder structure of the files being checked. +This results in filenames that exceed the filesystem's maximum filename length limit (typically 255 characters). + +Example error: +[source,text] +---- +Caused by: java.io.FileNotFoundException: +/home/xxxxx/.../documentation/build/test-results/htmlchecks/ +TEST-unit-html-_xxxx_xxxxx_xxx_xxxxxxxx_xxxxxxxxxx_xxxxxxxxxxxx_ +xxxxxxxxxxxxxxxxxxxxx_documentation_build_test-results_htmlchecks_ +xxxxxxxxxxxxxxx_xx_xxxxx_xxxxxxxx_xxx_xxxx_xxxxxxxxx_xx_xxxx_xx_ +xx_xxxxxxxxxxxxx_xx_xxxxxx_xxxxxxxxxxxxx_xx_xxxxxxxx_xxxxxxxxxx_ +xxxx.xml (File name too long) +---- + +=== Background + +The original `JUnitXmlReporter` implementation used a flat file structure where all JUnit XML reports were stored in a single directory. The filename was constructed by: + +. Taking the full file path of the checked HTML file +. Sanitizing it by replacing all non-alphanumeric characters with underscores +. Prepending `TEST-unit-html-` to create the final filename + +This approach worked well for shallow directory structures but failed when: + +* Working in deeply nested Gradle subprojects +* Checking HTML files that are themselves in deep directory structures +* The combined path length exceeded OS filename limits (~255 characters) + +=== Solution + +A new configuration option `junitOutputStyle` allows choosing between two output structures for JUnit XML reports: + +FLAT (default):: All JUnit XML reports are stored in a single directory with the entire file path encoded into the filename using underscores. This maintains backward compatibility with existing configurations. ++ +[source,text] +---- +build/test-results/htmlchecks/ + └── TEST-unit-html-_docs_guide_user_installation_linux_html.xml +---- + +HIERARCHICAL:: Creates a hierarchical directory structure that mirrors the source file organization, solving the filename length issue. ++ +[source,text] +---- +build/test-results/htmlchecks/ + └── docs/ + └── guide/ + └── user/ + └── installation/ + └── TEST-linux.html.xml ✅ +---- + +The HIERARCHICAL approach provides several benefits: + +Solves the filename length issue:: Individual filenames stay well under the 255-character filesystem limit +Intuitive organization:: Directory structure mirrors the checked HTML files' structure, making results easy to find +Maintains all information:: Full path information is preserved through the directory hierarchy +Robust error handling:: Handles edge cases like special characters, relative paths, and path traversal attempts + +=== Configuration + +To enable the hierarchical output structure, set `junitOutputStyle` to `HIERARCHICAL` in your build configuration: + +.Gradle +[source,groovy] +---- +htmlSanityCheck { + junitOutputStyle = org.aim42.htmlsanitycheck.Configuration.JunitOutputStyle.HIERARCHICAL +} +---- + +.Maven +[source,xml] +---- + + org.aim42.htmlSanityCheck + htmlSanityCheck-maven-plugin + + HIERARCHICAL + + +---- + +.CLI +[source,bash] +---- +hsc --junitOutputStyle HIERARCHICAL /path/to/html/files +---- + +NOTE: The default value is `FLAT` for backward compatibility. Existing users will see no change unless they explicitly configure `HIERARCHICAL` mode. + +=== Implementation + +The solution was implemented in `JUnitXmlReporter.java` with the following components: + +. A new `JunitOutputStyle` enum (nested in `Configuration` class) defining `FLAT` and `HIERARCHICAL` modes +. An `outputStyle` field to store the configuration (defaults to `FLAT`) +. Modified `reportPageSummary()` method to select the appropriate output strategy: ++ +[source,java] +---- +include::../../../../htmlSanityCheck-core/src/main/java/org/aim42/htmlsanitycheck/report/JUnitXmlReporter.java[tags=reportPageSummary,indent=0] +---- + +. Two separate methods implement each strategy: + * `getFlatOutputFile()` - Original flat structure implementation + * `getHierarchicalOutputFile()` - New hierarchical structure implementation + +Key implementation details for HIERARCHICAL mode: + +Path normalization:: Uses `getCanonicalFile()` to handle relative paths with `..` references +Security check:: Verifies that normalized paths don't escape the output directory +Graceful fallback:: Falls back to the output root if path resolution fails +Filename sanitization:: Only sanitizes the filename component, not the entire path + +The configuration option is exposed in: + +* `Configuration` class - `junitOutputStyle` field with `FLAT` default +* Gradle plugin (`HtmlSanityCheckTask`) - `junitOutputStyle` input property +* Maven plugin (`HtmlSanityCheckMojo`) - `junitOutputStyle` parameter +* CLI (`HscCommand`) - `--junitOutputStyle` / `-o` command-line option + +=== Testing + +The implementation includes comprehensive test coverage with 7 new tests in `JUnitXmlReporterTest` that explicitly test HIERARCHICAL mode: + +`testSimpleFilenameCreatesFileInRootDirectory`:: Verifies files with no directory path are created in the root +`testSingleLevelDirectoryCreatesSubdirectory`:: Tests single-level directory creation +`testDeepNestedDirectoryCreatesFullHierarchy`:: Tests deeply nested directories (4+ levels) +`testVeryLongPathDoesNotExceedFilenameLimit`:: Reproduces and fixes the issue #405 with very long paths +`testMultiplePagesCreateSeparateDirectories`:: Verifies multiple pages create separate directory structures +`testFilenameWithSpecialCharactersIsSanitized`:: Tests filename sanitization while preserving the directory structure +`testRelativePathWithDotDotIsHandledCorrectly`:: Tests edge case handling of relative paths + +All existing tests continue to pass with FLAT mode (the default), ensuring backward compatibility. +The full test suite of 379+ tests validates that existing functionality is preserved. diff --git a/src/docs/development/design-discussions.adoc b/src/docs/development/design-discussions.adoc index 05a2ac55..1f1e2ee8 100644 --- a/src/docs/development/design-discussions.adoc +++ b/src/docs/development/design-discussions.adoc @@ -19,5 +19,6 @@ include::_includes/issue-252.adoc[leveloffset=+2] === Resolved Issues +include::_includes/issue-405.adoc[leveloffset=+2] include::_includes/issue-244.adoc[leveloffset=+2] include::_includes/issue-190.adoc[leveloffset=+2]