修复HtmlUtil的removeHtmlAttr方法匹配问题

This commit is contained in:
Looly 2023-04-25 16:14:22 +08:00
parent 9b8f0a0ed4
commit acb40326f6
3 changed files with 22 additions and 2 deletions

View File

@ -2,7 +2,7 @@
# 🚀Changelog
-------------------------------------------------------------------------------------------------------------
# 5.8.19.M1 (2023-04-22)
# 5.8.19.M1 (2023-04-25)
### 🐣新特性
* 【db 】 优化HttpRequest.toString()内容打印issue#3072@Github
@ -11,6 +11,7 @@
* 【core 】 修复URLUtil.decode无法解码UTF-16问题issue#3063@Github
* 【db 】 修复insertOrUpdate更新中条件字段没有移除问题issue#I6W91Z@Gitee
* 【core 】 修复VIN车架号正则问题pr#3078@Github
* 【core 】 修复HtmlUtil的removeHtmlAttr方法匹配问题issue#I6YNTF@Gitee
-------------------------------------------------------------------------------------------------------------
# 5.8.18 (2023-04-16)

View File

@ -155,7 +155,15 @@ public class HtmlUtil {
// [^>]+? 属性值至少有一个非>的字符>表示标签结束
// \s+(?=>) 表示属性值后跟空格加>即末尾的属性此时去掉空格
// (?=\s|>) 表示属性值后跟空格属性后还有别的属性或者跟>最后一个属性
regex = StrUtil.format("(?i)(\\s*{}\\s*=[^>]+?\\s+(?=>))|(\\s*{}\\s*=[^>]+?(?=\\s|>))", attr, attr);
regex = StrUtil.format("(?i)(\\s*{}\\s*=\\s*)" +
"(" +
// name="xxxx"
"([\"][^\"]+?[\"]\\s*)|" +
// name=xxx >
"([^>]+?\\s+(?=>))|" +
// name=xxx> 或者 name=xxx name2=xxx
"([^>]+?(?=\\s|>))" +
")", attr);
content = content.replaceAll(regex, StrUtil.EMPTY);
}
return content;

View File

@ -181,4 +181,15 @@ public class HtmlUtilTest {
final String result = HtmlUtil.removeAllHtmlAttr(html, "div");
Assert.assertEquals("<div></div>", result);
}
@Test
public void issueI6YNTFTest() {
String html = "<html><body><div class=\"a1 a2\">hello world</div></body></html>";
String cleanText = HtmlUtil.removeHtmlAttr(html,"class");
Assert.assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
html = "<html><body><div class=a1>hello world</div></body></html>";
cleanText = HtmlUtil.removeHtmlAttr(html,"class");
Assert.assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
}
}