diff --git a/jsoup/pom.xml b/jsoup/pom.xml
index e5f9bd7ac01a..11f50378ca42 100644
--- a/jsoup/pom.xml
+++ b/jsoup/pom.xml
@@ -19,10 +19,16 @@
jsoup
${jsoup.version}
+
+ com.googlecode.owasp-java-html-sanitizer
+ owasp-java-html-sanitizer
+ ${owasp.version}
+
1.17.2
+ 20240325.1
\ No newline at end of file
diff --git a/jsoup/src/main/java/com/baeldung/jsoup/HTMLSanitizer.java b/jsoup/src/main/java/com/baeldung/jsoup/HTMLSanitizer.java
new file mode 100644
index 000000000000..9e889d339e7f
--- /dev/null
+++ b/jsoup/src/main/java/com/baeldung/jsoup/HTMLSanitizer.java
@@ -0,0 +1,45 @@
+package com.baeldung.jsoup;
+
+import org.jsoup.Jsoup;
+import org.jsoup.safety.Safelist;
+import org.owasp.html.HtmlPolicyBuilder;
+import org.owasp.html.PolicyFactory;
+import org.owasp.html.Sanitizers;
+
+public class HTMLSanitizer {
+
+ private static final PolicyFactory POLICY = Sanitizers.FORMATTING.and(Sanitizers.LINKS);
+ private static final PolicyFactory HTML_POLICY = new HtmlPolicyBuilder().allowCommonBlockElements()
+ .allowCommonInlineFormattingElements()
+ .toFactory();
+
+ private static final PolicyFactory CUSTOM_POLICY = new HtmlPolicyBuilder().allowElements("a", "p", "div", "span", "h1", "h2", "h3")
+ .allowUrlProtocols("https")
+ .allowAttributes("href")
+ .onElements("a")
+ .requireRelNofollowOnLinks()
+ .allowAttributes("class")
+ .globally()
+ .allowStyling()
+ .toFactory();
+
+ public static String sanitizeUsingBasic(String htmlContent) {
+ return POLICY.sanitize(htmlContent);
+ }
+
+ public static String sanitizeUsingHTMLPolicy(String html) {
+ return HTML_POLICY.sanitize(html);
+ }
+
+ public static String sanitizeUsingCustomPolicy(String html) {
+ return CUSTOM_POLICY.sanitize(html);
+ }
+
+ public static String sanitizeUsingJsoup(String html) {
+ Safelist safelist = Safelist.basic()
+ .addTags("h1", "h2", "h3")
+ .addAttributes("a", "target")
+ .addProtocols("a", "href", "http", "https");
+ return Jsoup.clean(html, safelist);
+ }
+}
diff --git a/jsoup/src/test/java/com/baeldung/jsoup/HTMLSanitizerUnitTest.java b/jsoup/src/test/java/com/baeldung/jsoup/HTMLSanitizerUnitTest.java
new file mode 100644
index 000000000000..6f470127d189
--- /dev/null
+++ b/jsoup/src/test/java/com/baeldung/jsoup/HTMLSanitizerUnitTest.java
@@ -0,0 +1,42 @@
+package com.baeldung.jsoup;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import org.junit.jupiter.api.Test;
+
+public class HTMLSanitizerUnitTest {
+
+ @Test
+ void givenScriptAndBasicTags_whenSanitizedWithBasicPolicy_thenStripScriptAndKeepFormatting() {
+ String input = "Hello link";
+ String expectedOutput = "Hello link";
+
+ String sanitized = HTMLSanitizer.sanitizeUsingBasic(input);
+ assertEquals(expectedOutput, sanitized);
+ }
+
+ @Test
+ void givenStyledHeadingAndUnsafeLink_whenSanitizedWithCustomPolicy_thenAllowOnlySafeContent() {
+ String input = "
Welcome
"
+ + "Click"
+ + "";
+ String expectedOutput = "Welcome
Click";
+ String sanitized = HTMLSanitizer.sanitizeUsingCustomPolicy(input);
+ assertEquals(expectedOutput, sanitized);
+ }
+
+ @Test
+ void givenMixedHtml_whenSanitizedWithCustomPolicy_thenApplyCustomRules() {
+ String input = "Hello
";
+ String expectedOutput = "Hello
";
+ String sanitized = HTMLSanitizer.sanitizeUsingCustomPolicy(input);
+ assertEquals(expectedOutput, sanitized);
+ }
+
+ @Test
+ void givenJavascriptHrefAndTargetAttribute_whenSanitizedWithJsoup_thenOnlyAllowSafeContent() {
+ String input = "Title
Click";
+ String expectedOutput = "Title
Click";
+ String sanitized = HTMLSanitizer.sanitizeUsingJsoup(input);
+ assertEquals(expectedOutput, sanitized);
+ }
+}