From dc13edd66336ded296a339c982f797d535685bdf Mon Sep 17 00:00:00 2001 From: freeok <51998152+freeok@users.noreply.github.com> Date: Fri, 10 Jan 2025 08:19:51 +0800 Subject: [PATCH] feat: Add cleanEmptyTag method for HtmlUtil --- .../main/java/cn/hutool/http/HtmlUtil.java | 12 ++++++++++ .../java/cn/hutool/http/HtmlUtilTest.java | 23 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java b/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java index 94d429a18..9e08e360a 100755 --- a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java +++ b/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java @@ -24,6 +24,7 @@ public class HtmlUtil { public static final String GT = StrUtil.HTML_GT; public static final String RE_HTML_MARK = "(<[^<]*?>)|(<[\\s]*?/[^<]*?>)|(<[^<]*?/[\\s]*?>)"; + public static final String RE_HTML_EMPTY_MARK = "<(\\w+)([^>]*)>\\s*"; public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; private static final char[][] TEXT = new char[256][]; @@ -86,6 +87,17 @@ public class HtmlUtil { return content.replaceAll(RE_HTML_MARK, ""); } + /** + * 清除所有HTML空标签
+ * 例如:<p></p> + * + * @param content 文本 + * @return 清除空标签后的文本 + */ + public static String cleanEmptyTag(String content) { + return content.replaceAll(RE_HTML_EMPTY_MARK, ""); + } + /** * 清除指定HTML标签和被标签包围的内容
* 不区分大小写 diff --git a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java index 6e0d56a59..2c4487768 100644 --- a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java +++ b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java @@ -77,6 +77,29 @@ public class HtmlUtilTest { assertEquals("pre\r\n\t\tdfdsfdsfdsf\r\nBBBB", result); } + @Test + public void cleanEmptyTag() { + String str = "

"; + String result = HtmlUtil.cleanEmptyTag(str); + assertEquals("", result); + + str = "

TEXT

"; + result = HtmlUtil.cleanEmptyTag(str); + assertEquals("

TEXT

", result); + + str = "

TEXT
"; + result = HtmlUtil.cleanEmptyTag(str); + assertEquals("
TEXT
", result); + + str = "

TEXT

TEXT
"; + result = HtmlUtil.cleanEmptyTag(str); + assertEquals("

TEXT

TEXT
", result); + + str = "TEXT

TEXT"; + result = HtmlUtil.cleanEmptyTag(str); + assertEquals("TEXTTEXT", result); + } + @Test public void unwrapHtmlTagTest() { //非闭合标签