This commit is contained in:
Looly 2022-10-25 10:49:33 +08:00
parent 267e16e9c1
commit b1caf395d9
10 changed files with 173 additions and 40 deletions

View File

@ -0,0 +1,108 @@
package cn.hutool.core.compress;
import cn.hutool.core.io.IORuntimeException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.zip.Inflater;
/**
* {@link java.util.zip.InflaterInputStream}包装实现实现"deflate"算法解压<br>
* 参考org.apache.hc.client5.http.entity.DeflateInputStream
*
* @author looly
*/
public class InflaterInputStream extends InputStream {
private final java.util.zip.InflaterInputStream in;
/**
* 构造
*
* @param wrapped 被包装的流
*/
public InflaterInputStream(final InputStream wrapped) {
this(wrapped, 512);
}
/**
* 构造
*
* @param wrapped 被包装的流
* @param size buffer大小
*/
public InflaterInputStream(final InputStream wrapped, final int size) {
final PushbackInputStream pushback = new PushbackInputStream(wrapped, 2);
final int i1, i2;
try {
i1 = pushback.read();
i2 = pushback.read();
if (i1 == -1 || i2 == -1) {
throw new IORuntimeException("Unexpected end of stream");
}
pushback.unread(i2);
pushback.unread(i1);
} catch (final IOException e) {
throw new IORuntimeException(e);
}
boolean nowrap = true;
final int b1 = i1 & 0xFF;
final int compressionMethod = b1 & 0xF;
final int compressionInfo = b1 >> 4 & 0xF;
final int b2 = i2 & 0xFF;
if (compressionMethod == 8 && compressionInfo <= 7 && ((b1 << 8) | b2) % 31 == 0) {
nowrap = false;
}
in = new java.util.zip.InflaterInputStream(pushback, new Inflater(nowrap), size);
}
@Override
public int read() throws IOException {
return this.in.read();
}
@SuppressWarnings("NullableProblems")
@Override
public int read(final byte[] b) throws IOException {
return in.read(b);
}
@SuppressWarnings("NullableProblems")
@Override
public int read(final byte[] b, final int off, final int len) throws IOException {
return in.read(b, off, len);
}
@Override
public long skip(final long n) throws IOException {
return in.skip(n);
}
@Override
public int available() throws IOException {
return in.available();
}
@Override
public void mark(final int readLimit) {
in.mark(readLimit);
}
@Override
public void reset() throws IOException {
in.reset();
}
@Override
public boolean markSupported() {
return in.markSupported();
}
@Override
public void close() throws IOException {
in.close();
}
}

View File

@ -22,6 +22,12 @@
<artifactId>hutool-core</artifactId> <artifactId>hutool-core</artifactId>
<version>${project.parent.version}</version> <version>${project.parent.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.1.3</version>
<scope>provided</scope>
</dependency>
<dependency> <dependency>
<groupId>javax.xml.soap</groupId> <groupId>javax.xml.soap</groupId>
<artifactId>javax.xml.soap-api</artifactId> <artifactId>javax.xml.soap-api</artifactId>

View File

@ -1,9 +1,11 @@
package cn.hutool.http; package cn.hutool.http;
import cn.hutool.core.compress.InflaterInputStream;
import cn.hutool.core.map.CaseInsensitiveMap; import cn.hutool.core.map.CaseInsensitiveMap;
import java.io.InputStream; import java.io.InputStream;
import java.util.Map; import java.util.Map;
import java.util.zip.GZIPInputStream;
/** /**
* 全局响应内容压缩解压器注册中心<br> * 全局响应内容压缩解压器注册中心<br>
@ -23,7 +25,12 @@ public enum GlobalCompressStreamRegister {
*/ */
private final Map<String, Class<? extends InputStream>> compressMap = new CaseInsensitiveMap<>(); private final Map<String, Class<? extends InputStream>> compressMap = new CaseInsensitiveMap<>();
/**
* 构造初始化默认的压缩算法
*/
GlobalCompressStreamRegister() { GlobalCompressStreamRegister() {
compressMap.put("gzip", GZIPInputStream.class);
compressMap.put("deflate", InflaterInputStream.class);
} }
/** /**

View File

@ -7,19 +7,17 @@ import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
/** /**
* HTTP输入流此流用于包装Http请求响应内容的流用于解析各种压缩分段的响应流内容 * HTTP输入流此流用于包装Http请求响应内容的流用于解析各种压缩分段的响应流内容
* *
* @author Looly * @author Looly
*
*/ */
public class HttpInputStream extends InputStream { public class HttpInputStream extends InputStream {
/** 原始流 */ /**
* 原始流
*/
private InputStream in; private InputStream in;
/** /**
@ -89,30 +87,17 @@ public class HttpInputStream extends InputStream {
// 在一些情况下返回的流为null此时提供状态码说明 // 在一些情况下返回的流为null此时提供状态码说明
if (null == this.in) { if (null == this.in) {
this.in = new ByteArrayInputStream(StrUtil.format("Error request, response status: {}", response.status).getBytes()); this.in = new ByteArrayInputStream(StrUtil.format("Error request, null response with status: {}", response.status).getBytes());
return; return;
} }
final String contentEncoding = response.contentEncoding(); final String contentEncoding = response.contentEncoding();
if (StrUtil.equalsIgnoreCase("gzip", contentEncoding) && false == (response.in instanceof GZIPInputStream)) { final Class<? extends InputStream> streamClass = GlobalCompressStreamRegister.INSTANCE.get(contentEncoding);
// Accept-Encoding: gzip if (null != streamClass) {
try { try {
this.in = new GZIPInputStream(this.in); this.in = ConstructorUtil.newInstance(streamClass, this.in);
} catch (final IOException ignore) { } catch (final Exception ignore) {
// 在类似于Head等方法中无body返回此时GZIPInputStream构造会出现错误在此忽略此错误读取普通数据 // 对于构造错误的压缩算法跳过之
// ignore
}
} else if (StrUtil.equalsIgnoreCase("deflate", contentEncoding) && false == (this.in instanceof InflaterInputStream)) {
// Accept-Encoding: defalte
this.in = new InflaterInputStream(this.in, new Inflater(true));
} else{
final Class<? extends InputStream> streamClass = GlobalCompressStreamRegister.INSTANCE.get(contentEncoding);
if(null != streamClass){
try {
this.in = ConstructorUtil.newInstance(streamClass, this.in);
} catch (final Exception ignore) {
// 对于构造错误的压缩算法跳过之
}
} }
} }
} }

View File

@ -1,4 +1,4 @@
package cn.hutool.http; package cn.hutool.http.html;
import cn.hutool.core.lang.Console; import cn.hutool.core.lang.Console;
import cn.hutool.core.map.SafeConcurrentHashMap; import cn.hutool.core.map.SafeConcurrentHashMap;

View File

@ -1,10 +1,12 @@
package cn.hutool.http; package cn.hutool.http.html;
import cn.hutool.core.regex.ReUtil; import cn.hutool.core.regex.ReUtil;
import cn.hutool.core.text.StrUtil; import cn.hutool.core.text.StrUtil;
import cn.hutool.core.text.escape.EscapeUtil; import cn.hutool.core.text.escape.EscapeUtil;
import cn.hutool.core.util.XmlUtil; import cn.hutool.core.util.XmlUtil;
import java.util.regex.Pattern;
/** /**
* HTML工具类 * HTML工具类
* *
@ -13,18 +15,23 @@ import cn.hutool.core.util.XmlUtil;
* 比如去掉指定标签例如广告栏等去除JS去掉样式等等这些操作都可以使用此工具类完成 * 比如去掉指定标签例如广告栏等去除JS去掉样式等等这些操作都可以使用此工具类完成
* *
* @author xiaoleilu * @author xiaoleilu
*
*/ */
public class HtmlUtil { public class HtmlUtil {
public static final String RE_HTML_MARK = "(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)"; /**
public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; * HTML标签正则
*/
public static final Pattern RE_HTML_MARK = Pattern.compile("(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)", Pattern.CASE_INSENSITIVE);
/**
* script标签正则
*/
public static final Pattern RE_SCRIPT = Pattern.compile("<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>", Pattern.CASE_INSENSITIVE);
private static final char[][] TEXT = new char[64][]; private static final char[][] TEXT = new char[64][];
static { static {
for (int i = 0; i < 64; i++) { for (int i = 0; i < 64; i++) {
TEXT[i] = new char[] { (char) i }; TEXT[i] = new char[]{(char) i};
} }
// special HTML characters // special HTML characters
@ -75,14 +82,24 @@ public class HtmlUtil {
* @return 清除标签后的文本 * @return 清除标签后的文本
*/ */
public static String cleanHtmlTag(final String content) { public static String cleanHtmlTag(final String content) {
return content.replaceAll(RE_HTML_MARK, ""); return ReUtil.replaceAll(content, RE_HTML_MARK, "");
}
/**
* 清除所有script标签包括内容
*
* @param content 文本
* @return 清除标签后的文本
*/
public static String removeScriptTag(final String content) {
return ReUtil.replaceAll(content, RE_SCRIPT, "");
} }
/** /**
* 清除指定HTML标签和被标签包围的内容<br> * 清除指定HTML标签和被标签包围的内容<br>
* 不区分大小写 * 不区分大小写
* *
* @param content 文本 * @param content 文本
* @param tagNames 要清除的标签 * @param tagNames 要清除的标签
* @return 去除标签后的文本 * @return 去除标签后的文本
*/ */
@ -94,7 +111,7 @@ public class HtmlUtil {
* 清除指定HTML标签不包括内容<br> * 清除指定HTML标签不包括内容<br>
* 不区分大小写 * 不区分大小写
* *
* @param content 文本 * @param content 文本
* @param tagNames 要清除的标签 * @param tagNames 要清除的标签
* @return 去除标签后的文本 * @return 去除标签后的文本
*/ */
@ -106,9 +123,9 @@ public class HtmlUtil {
* 清除指定HTML标签<br> * 清除指定HTML标签<br>
* 不区分大小写 * 不区分大小写
* *
* @param content 文本 * @param content 文本
* @param withTagContent 是否去掉被包含在标签中的内容 * @param withTagContent 是否去掉被包含在标签中的内容
* @param tagNames 要清除的标签 * @param tagNames 要清除的标签
* @return 去除标签后的文本 * @return 去除标签后的文本
*/ */
public static String removeHtmlTag(String content, final boolean withTagContent, final String... tagNames) { public static String removeHtmlTag(String content, final boolean withTagContent, final String... tagNames) {
@ -136,7 +153,7 @@ public class HtmlUtil {
* 去除HTML标签中的属性如果多个标签有相同属性都去除 * 去除HTML标签中的属性如果多个标签有相同属性都去除
* *
* @param content 文本 * @param content 文本
* @param attrs 属性名不区分大小写 * @param attrs 属性名不区分大小写
* @return 处理后的文本 * @return 处理后的文本
*/ */
public static String removeHtmlAttr(String content, final String... attrs) { public static String removeHtmlAttr(String content, final String... attrs) {
@ -156,7 +173,7 @@ public class HtmlUtil {
/** /**
* 去除指定标签的所有属性 * 去除指定标签的所有属性
* *
* @param content 内容 * @param content 内容
* @param tagNames 指定标签 * @param tagNames 指定标签
* @return 处理后的文本 * @return 处理后的文本
*/ */

View File

@ -0,0 +1,6 @@
/**
* HTML相关工具封装
*
* @author looly
*/
package cn.hutool.http.html;

View File

@ -9,6 +9,7 @@ import cn.hutool.http.server.action.RootAction;
import cn.hutool.http.server.filter.HttpFilter; import cn.hutool.http.server.filter.HttpFilter;
import cn.hutool.http.server.filter.SimpleFilter; import cn.hutool.http.server.filter.SimpleFilter;
import cn.hutool.http.server.handler.ActionHandler; import cn.hutool.http.server.handler.ActionHandler;
import com.sun.net.httpserver.Filter; import com.sun.net.httpserver.Filter;
import com.sun.net.httpserver.HttpContext; import com.sun.net.httpserver.HttpContext;
import com.sun.net.httpserver.HttpExchange; import com.sun.net.httpserver.HttpExchange;

View File

@ -1,5 +1,6 @@
package cn.hutool.http; package cn.hutool.http;
import cn.hutool.http.html.HtmlUtil;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;

View File

@ -115,9 +115,11 @@ public class HttpRequestTest {
@Test @Test
@Ignore @Ignore
public void getDeflateTest() { public void getDeflateTest() {
final String res = HttpRequest.get("https://comment.bilibili.com/67573272.xml") final HttpResponse res = HttpRequest.get("https://comment.bilibili.com/67573272.xml")
.execute().body(); .header(Header.ACCEPT_ENCODING, "deflate")
Console.log(res); .execute();
Console.log(res.header(Header.CONTENT_ENCODING));
Console.log(res.body());
} }
@Test @Test