diff --git a/jsoup/pom.xml b/jsoup/pom.xml index e5f9bd7ac01a..11f50378ca42 100644 --- a/jsoup/pom.xml +++ b/jsoup/pom.xml @@ -19,10 +19,16 @@ jsoup ${jsoup.version} + + com.googlecode.owasp-java-html-sanitizer + owasp-java-html-sanitizer + ${owasp.version} + 1.17.2 + 20240325.1 \ No newline at end of file diff --git a/jsoup/src/main/java/com/baeldung/jsoup/HTMLSanitizer.java b/jsoup/src/main/java/com/baeldung/jsoup/HTMLSanitizer.java new file mode 100644 index 000000000000..9e889d339e7f --- /dev/null +++ b/jsoup/src/main/java/com/baeldung/jsoup/HTMLSanitizer.java @@ -0,0 +1,45 @@ +package com.baeldung.jsoup; + +import org.jsoup.Jsoup; +import org.jsoup.safety.Safelist; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; +import org.owasp.html.Sanitizers; + +public class HTMLSanitizer { + + private static final PolicyFactory POLICY = Sanitizers.FORMATTING.and(Sanitizers.LINKS); + private static final PolicyFactory HTML_POLICY = new HtmlPolicyBuilder().allowCommonBlockElements() + .allowCommonInlineFormattingElements() + .toFactory(); + + private static final PolicyFactory CUSTOM_POLICY = new HtmlPolicyBuilder().allowElements("a", "p", "div", "span", "h1", "h2", "h3") + .allowUrlProtocols("https") + .allowAttributes("href") + .onElements("a") + .requireRelNofollowOnLinks() + .allowAttributes("class") + .globally() + .allowStyling() + .toFactory(); + + public static String sanitizeUsingBasic(String htmlContent) { + return POLICY.sanitize(htmlContent); + } + + public static String sanitizeUsingHTMLPolicy(String html) { + return HTML_POLICY.sanitize(html); + } + + public static String sanitizeUsingCustomPolicy(String html) { + return CUSTOM_POLICY.sanitize(html); + } + + public static String sanitizeUsingJsoup(String html) { + Safelist safelist = Safelist.basic() + .addTags("h1", "h2", "h3") + .addAttributes("a", "target") + .addProtocols("a", "href", "http", "https"); + return Jsoup.clean(html, safelist); + } +} diff --git a/jsoup/src/test/java/com/baeldung/jsoup/HTMLSanitizerUnitTest.java b/jsoup/src/test/java/com/baeldung/jsoup/HTMLSanitizerUnitTest.java new file mode 100644 index 000000000000..6f470127d189 --- /dev/null +++ b/jsoup/src/test/java/com/baeldung/jsoup/HTMLSanitizerUnitTest.java @@ -0,0 +1,42 @@ +package com.baeldung.jsoup; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.Test; + +public class HTMLSanitizerUnitTest { + + @Test + void givenScriptAndBasicTags_whenSanitizedWithBasicPolicy_thenStripScriptAndKeepFormatting() { + String input = "Hello link"; + String expectedOutput = "Hello link"; + + String sanitized = HTMLSanitizer.sanitizeUsingBasic(input); + assertEquals(expectedOutput, sanitized); + } + + @Test + void givenStyledHeadingAndUnsafeLink_whenSanitizedWithCustomPolicy_thenAllowOnlySafeContent() { + String input = "

Welcome

" + + "Click" + + ""; + String expectedOutput = "

Welcome

Click"; + String sanitized = HTMLSanitizer.sanitizeUsingCustomPolicy(input); + assertEquals(expectedOutput, sanitized); + } + + @Test + void givenMixedHtml_whenSanitizedWithCustomPolicy_thenApplyCustomRules() { + String input = "
Hello
"; + String expectedOutput = "
Hello
"; + String sanitized = HTMLSanitizer.sanitizeUsingCustomPolicy(input); + assertEquals(expectedOutput, sanitized); + } + + @Test + void givenJavascriptHrefAndTargetAttribute_whenSanitizedWithJsoup_thenOnlyAllowSafeContent() { + String input = "

Title

Click"; + String expectedOutput = "

Title

Click"; + String sanitized = HTMLSanitizer.sanitizeUsingJsoup(input); + assertEquals(expectedOutput, sanitized); + } +}