enhance HtmlUtil。removeHtmlAttr

This commit is contained in:
Looly 2019-09-28 00:07:30 +08:00
parent 9eb00756f8
commit 32955b9c8c
4 changed files with 33 additions and 12 deletions

View File

@ -9,6 +9,7 @@
* 【core】 ArrayUtil.isEmpty可变长参数改为数组issue#555@Github
* 【core】 新增Convert.toMap方法issue#I12ISI@Gitee
* 【aop 】 增加返回值获取支持优化逻辑和接口pr#561@Github
* 【aop 】 改进HtmlUtil.removeHtmlAttrissue#556@Github
### Bug修复
* 【extra】 修复Mail中sslEnable无效问题pr#74@Gitee

View File

@ -30,11 +30,11 @@ public abstract class AbstractCaptcha implements ICaptcha {
private static final long serialVersionUID = 3180820918087507254L;
/** 图片的宽度 */
protected int width = 100;
protected int width;
/** 图片的高度 */
protected int height = 37;
protected int height;
/** 验证码干扰元素个数 */
protected int interfereCount = 15;
protected int interfereCount;
/** 字体 */
protected Font font;
/** 验证码 */

View File

@ -55,7 +55,7 @@ public class HtmlUtil {
* @return 转义后的文本
*/
public static String escape(String text) {
return encode(text, TEXT);
return encode(text);
}
/**
@ -118,7 +118,7 @@ public class HtmlUtil {
* @return 去除标签后的文本
*/
public static String removeHtmlTag(String content, boolean withTagContent, String... tagNames) {
String regex = null;
String regex;
for (String tagName : tagNames) {
if (StrUtil.isBlank(tagName)) {
continue;
@ -146,10 +146,14 @@ public class HtmlUtil {
* @return 处理后的文本
*/
public static String removeHtmlAttr(String content, String... attrs) {
String regex = null;
String regex;
for (String attr : attrs) {
// (?i)表示忽略大小写
regex = StrUtil.format("(?i)\\s*{}=([\"']).*?\\1", attr);
// (?i) 表示忽略大小写
// \s* 属性名前后的空白符去除
// [^>]+? 属性值至少有一个非>的字符>表示标签结束
// \s+(?=>) 表示属性值后跟空格加>既末尾的属性此时去掉空格
// (?=\s|>) 表示属性值后跟空格属性后还有别的属性或者跟>最后一个属性
regex = StrUtil.format("(?i)(\\s*{}\\s*=[^>]+?\\s+(?=>))|(\\s*{}\\s*=[^>]+?(?=\\s|>))", attr, attr);
content = content.replaceAll(regex, StrUtil.EMPTY);
}
return content;
@ -163,7 +167,7 @@ public class HtmlUtil {
* @return 处理后的文本
*/
public static String removeAllHtmlAttr(String content, String... tagNames) {
String regex = null;
String regex;
for (String tagName : tagNames) {
regex = StrUtil.format("(?i)<{}[^>]*?>", tagName);
content = content.replaceAll(regex, StrUtil.format("<{}>", tagName));
@ -175,10 +179,9 @@ public class HtmlUtil {
* Encoder
*
* @param text 被编码的文本
* @param array 特殊字符集合
* @return 编码后的字符
*/
private static String encode(String text, char[][] array) {
private static String encode(String text) {
int len;
if ((text == null) || ((len = text.length()) == 0)) {
return StrUtil.EMPTY;
@ -188,7 +191,7 @@ public class HtmlUtil {
for (int i = 0; i < len; i++) {
c = text.charAt(i);
if (c < 64) {
buffer.append(array[c]);
buffer.append(TEXT[c]);
} else {
buffer.append(c);
}

View File

@ -130,9 +130,26 @@ public class HtmlUtilTest {
@Test
public void removeHtmlAttrTest() {
// 去除的属性加双引号测试
String html = "<div class=\"test_div\"></div><span class=\"test_div\"></span>";
String result = HtmlUtil.removeHtmlAttr(html, "class");
Assert.assertEquals("<div></div><span></span>", result);
// 去除的属性后跟空格加单引号不加引号测试
html = "<div class=test_div></div><span Class='test_div' ></span>";
result = HtmlUtil.removeHtmlAttr(html, "class");
Assert.assertEquals("<div></div><span></span>", result);
// 去除的属性位于标签末尾其它属性前测试
html = "<div style=\"margin:100%\" class=test_div></div><span Class='test_div' width=100></span>";
result = HtmlUtil.removeHtmlAttr(html, "class");
Assert.assertEquals("<div style=\"margin:100%\"></div><span width=100></span>", result);
// 去除的属性名和值之间存在空格
html = "<div style = \"margin:100%\" class = test_div></div><span Class = 'test_div' width=100></span>";
result = HtmlUtil.removeHtmlAttr(html, "class");
Assert.assertEquals("<div style = \"margin:100%\"></div><span width=100></span>", result);
}
@Test