From dc13edd66336ded296a339c982f797d535685bdf Mon Sep 17 00:00:00 2001
From: freeok <51998152+freeok@users.noreply.github.com>
Date: Fri, 10 Jan 2025 08:19:51 +0800
Subject: [PATCH] feat: Add cleanEmptyTag method for HtmlUtil
---
.../main/java/cn/hutool/http/HtmlUtil.java | 12 ++++++++++
.../java/cn/hutool/http/HtmlUtilTest.java | 23 +++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java b/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java
index 94d429a18..9e08e360a 100755
--- a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java
+++ b/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java
@@ -24,6 +24,7 @@ public class HtmlUtil {
public static final String GT = StrUtil.HTML_GT;
public static final String RE_HTML_MARK = "(<[^<]*?>)|(<[\\s]*?/[^<]*?>)|(<[^<]*?/[\\s]*?>)";
+ public static final String RE_HTML_EMPTY_MARK = "<(\\w+)([^>]*)>\\s*\\1>";
public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
private static final char[][] TEXT = new char[256][];
@@ -86,6 +87,17 @@ public class HtmlUtil {
return content.replaceAll(RE_HTML_MARK, "");
}
+ /**
+ * 清除所有HTML空标签
+ * 例如:<p></p>
+ *
+ * @param content 文本
+ * @return 清除空标签后的文本
+ */
+ public static String cleanEmptyTag(String content) {
+ return content.replaceAll(RE_HTML_EMPTY_MARK, "");
+ }
+
/**
* 清除指定HTML标签和被标签包围的内容
* 不区分大小写
diff --git a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java
index 6e0d56a59..2c4487768 100644
--- a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java
+++ b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java
@@ -77,6 +77,29 @@ public class HtmlUtilTest {
assertEquals("pre\r\n\t\tdfdsfdsfdsf\r\nBBBB", result);
}
+ @Test
+ public void cleanEmptyTag() {
+ String str = "
TEXT
"; + result = HtmlUtil.cleanEmptyTag(str); + assertEquals("TEXT
", result); + + str = "TEXT
TEXT