diff --git a/src/main/java/org/jahia/modules/richtext/configuration/parse/Parser.java b/src/main/java/org/jahia/modules/richtext/configuration/parse/Parser.java index bed2f91..50e6a0a 100644 --- a/src/main/java/org/jahia/modules/richtext/configuration/parse/Parser.java +++ b/src/main/java/org/jahia/modules/richtext/configuration/parse/Parser.java @@ -19,11 +19,14 @@ private enum PolicyType { static { PATTERNS = new HashMap<>(); + String onsiteUrl = "(?:[\\p{L}\\p{N}\\\\\\.\\#@\\$%\\+&;\\-_~,\\?=/!{}:]+|\\#(\\w)+)"; + String offsiteUrl = "(\\s*(?:(?:ht|f)tps?://|mailto:)[\\p{L}\\p{N}][\\p{L}\\p{N}\\p{Zs}\\.\\#@\\$%\\+&;:\\-_~,\\?=/!\\(\\)" + + "]*+\\s*)"; PATTERNS.put("NUMBER_OR_PERCENT", Pattern.compile("[0-9]+%?")); - PATTERNS.put("ONSITE_URL", Pattern.compile("(?:[\\p{L}\\p{N}\\\\\\.\\#@\\$%\\+&;\\-_~,\\?=/!]+|\\#(\\w)+)")); + PATTERNS.put("ONSITE_URL", Pattern.compile(onsiteUrl)); + PATTERNS.put("OFFSITE_URL", Pattern.compile(onsiteUrl)); + PATTERNS.put("LINKS_URL", Pattern.compile(String.format("(?:%s|%s)", onsiteUrl, offsiteUrl))); PATTERNS.put("HTML_ID", Pattern.compile("[a-zA-Z0-9\\:\\-_\\.]+")); - PATTERNS.put("OFFSITE_URL", Pattern.compile("\\s*(?:(?:ht|f)tps?://|mailto:)[\\p{L}\\p{N}]" - + "[\\p{L}\\p{N}\\p{Zs}\\.\\#@\\$%\\+&;:\\-_~,\\?=/!\\(\\)]*+\\s*")); PATTERNS.put("HTML_CLASS", Pattern.compile("[a-zA-Z0-9\\s,\\-_]+")); PATTERNS.put("NUMBER", Pattern.compile("[+-]?(?:(?:[0-9]+(?:\\.[0-9]*)?)|\\.[0-9]+)")); PATTERNS.put("NAME", Pattern.compile("[a-zA-Z0-9\\-_\\$]+")); diff --git a/src/main/resources/META-INF/configuration-default/org.jahia.modules.richtext.config-default.yml b/src/main/resources/META-INF/configuration-default/org.jahia.modules.richtext.config-default.yml index 50d87cc..05afd88 100644 --- a/src/main/resources/META-INF/configuration-default/org.jahia.modules.richtext.config-default.yml +++ b/src/main/resources/META-INF/configuration-default/org.jahia.modules.richtext.config-default.yml @@ -38,7 +38,7 @@ htmlFiltering: pattern: NUMBER_OR_PERCENT elements: canvas, img, table, td, th, col, colgroup, video - name: href - pattern: ONSITE_URL + pattern: LINKS_URL elements: a - name: hreflang elements: a diff --git a/tests/cypress/e2e/defaultFiltering.cy.ts b/tests/cypress/e2e/defaultFiltering.cy.ts new file mode 100644 index 0000000..a01afea --- /dev/null +++ b/tests/cypress/e2e/defaultFiltering.cy.ts @@ -0,0 +1,80 @@ +import {addNode, deleteNode} from '@jahia/cypress'; +import { + disableHtmlFiltering, + enableHtmlFiltering, + getContent, + modifyContent +} from '../fixtures/utils'; + +/** + * Test scenarios for default filtering cases + */ +describe('Default HTML filtering', () => { + const siteKey = 'digitall'; + const textName = 'myText'; + const path = `/sites/${siteKey}/contents/${textName}`; + + before(() => { + addNode({ + parentPathOrId: `/sites/${siteKey}/contents`, + primaryNodeType: 'jnt:bigText', + name: textName, + properties: [{name: 'text', value: '

hello there

', language: 'en'}] + }); + enableHtmlFiltering(siteKey); + }); + + after(() => { + disableHtmlFiltering(siteKey); + deleteNode(path); + }); + + it('allows internal links', () => { + // Note that the actual href text being sent over to the sanitizer is '##doc-context##/{workspace}/##ref:link1##' + const text = '

/files/{workspace}/sites/digitall/files/images/pdf/Conference%20Guide.pdf

'; + modifyContent(path, text); + getContent(path).then(result => { + const value = result.data.jcr.nodeByPath.property.value; + expect(value).to.contain('

'); + expect(value).to.contain(' { + const text = '

This is a google link

'; + modifyContent(path, text); + getContent(path).then(result => { + const value = result.data.jcr.nodeByPath.property.value; + expect(value).to.contain('

'); + expect(value).to.contain(' { + const text = '

This is an xss test

'; + modifyContent(path, text); + getContent(path).then(result => { + const value = result.data.jcr.nodeByPath.property.value; + expect(value).to.contain('

'); + expect(value).to.not.contain(' { + const text = '

This is an xss test

'; + modifyContent(path, text); + getContent(path).then(result => { + const value = result.data.jcr.nodeByPath.property.value; + expect(value).to.contain('

'); + expect(value).to.contain('