feat: Add cleanEmptyTag method for HtmlUtil

This commit is contained in:
freeok 2025-01-10 08:19:51 +08:00
parent b89d4f6a09
commit dc13edd663
2 changed files with 35 additions and 0 deletions

View File

@ -24,6 +24,7 @@ public class HtmlUtil {
public static final String GT = StrUtil.HTML_GT;
public static final String RE_HTML_MARK = "(<[^<]*?>)|(<[\\s]*?/[^<]*?>)|(<[^<]*?/[\\s]*?>)";
public static final String RE_HTML_EMPTY_MARK = "<(\\w+)([^>]*)>\\s*</\\1>";
public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
private static final char[][] TEXT = new char[256][];
@ -86,6 +87,17 @@ public class HtmlUtil {
return content.replaceAll(RE_HTML_MARK, "");
}
/**
* 清除所有HTML空标签<br>
* 例如&lt;p&gt;&lt;/p&gt;
*
* @param content 文本
* @return 清除空标签后的文本
*/
public static String cleanEmptyTag(String content) {
return content.replaceAll(RE_HTML_EMPTY_MARK, "");
}
/**
* 清除指定HTML标签和被标签包围的内容<br>
* 不区分大小写

View File

@ -77,6 +77,29 @@ public class HtmlUtilTest {
assertEquals("pre\r\n\t\tdfdsfdsfdsf\r\nBBBB", result);
}
@Test
public void cleanEmptyTag() {
String str = "<p></p><div></div>";
String result = HtmlUtil.cleanEmptyTag(str);
assertEquals("", result);
str = "<p>TEXT</p><div></div>";
result = HtmlUtil.cleanEmptyTag(str);
assertEquals("<p>TEXT</p>", result);
str = "<p></p><div>TEXT</div>";
result = HtmlUtil.cleanEmptyTag(str);
assertEquals("<div>TEXT</div>", result);
str = "<p>TEXT</p><div>TEXT</div>";
result = HtmlUtil.cleanEmptyTag(str);
assertEquals("<p>TEXT</p><div>TEXT</div>", result);
str = "TEXT<p></p><div></div>TEXT";
result = HtmlUtil.cleanEmptyTag(str);
assertEquals("TEXTTEXT", result);
}
@Test
public void unwrapHtmlTagTest() {
//非闭合标签