Merge pull request #3838 from freeok/v5-dev

feat: Add cleanEmptyTag method for HtmlUtil
This commit is contained in:
Golden Looly 2025-01-10 09:54:18 +08:00 committed by GitHub
commit e097df5bdb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 35 additions and 0 deletions

View File

@ -24,6 +24,7 @@ public class HtmlUtil {
public static final String GT = StrUtil.HTML_GT;
public static final String RE_HTML_MARK = "(<[^<]*?>)|(<[\\s]*?/[^<]*?>)|(<[^<]*?/[\\s]*?>)";
public static final String RE_HTML_EMPTY_MARK = "<(\\w+)([^>]*)>\\s*</\\1>";
public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
private static final char[][] TEXT = new char[256][];
@ -86,6 +87,17 @@ public class HtmlUtil {
return content.replaceAll(RE_HTML_MARK, "");
}
/**
* 清除所有HTML空标签<br>
* 例如&lt;p&gt;&lt;/p&gt;
*
* @param content 文本
* @return 清除空标签后的文本
*/
public static String cleanEmptyTag(String content) {
return content.replaceAll(RE_HTML_EMPTY_MARK, "");
}
/**
* 清除指定HTML标签和被标签包围的内容<br>
* 不区分大小写

View File

@ -77,6 +77,29 @@ public class HtmlUtilTest {
assertEquals("pre\r\n\t\tdfdsfdsfdsf\r\nBBBB", result);
}
@Test
public void cleanEmptyTag() {
String str = "<p></p><div></div>";
String result = HtmlUtil.cleanEmptyTag(str);
assertEquals("", result);
str = "<p>TEXT</p><div></div>";
result = HtmlUtil.cleanEmptyTag(str);
assertEquals("<p>TEXT</p>", result);
str = "<p></p><div>TEXT</div>";
result = HtmlUtil.cleanEmptyTag(str);
assertEquals("<div>TEXT</div>", result);
str = "<p>TEXT</p><div>TEXT</div>";
result = HtmlUtil.cleanEmptyTag(str);
assertEquals("<p>TEXT</p><div>TEXT</div>", result);
str = "TEXT<p></p><div></div>TEXT";
result = HtmlUtil.cleanEmptyTag(str);
assertEquals("TEXTTEXT", result);
}
@Test
public void unwrapHtmlTagTest() {
//非闭合标签