mirror of
https://gitee.com/dromara/hutool.git
synced 2025-04-05 17:37:59 +08:00
fix(FileTypeUtil): 重构根据file magic number判断文件类型
1.重构多个Magic Number 2.根据单独类型匹配,修复某些文件跳位置对比 3.获取到文件Mime类型和后缀 4.获取文件流从28byte提升到64byte 5.添加精确匹配如docx、xlsx、pptx、doc、xls、ppt,精确匹配为8192byte(大小10k文件左右) 6.添加file magic number枚举 Closes https://github.com/dromara/hutool/issues/2821
This commit is contained in:
parent
988e59eafd
commit
d67e1c567d
1238
hutool-core/src/main/java/cn/hutool/core/io/FileMagicNumber.java
Normal file
1238
hutool-core/src/main/java/cn/hutool/core/io/FileMagicNumber.java
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,9 @@
|
||||
package cn.hutool.core.io;
|
||||
|
||||
import cn.hutool.core.util.HexUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.*;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.concurrent.ConcurrentSkipListMap;
|
||||
@ -20,71 +19,7 @@ import java.util.concurrent.ConcurrentSkipListMap;
|
||||
*/
|
||||
public class FileTypeUtil {
|
||||
|
||||
private static final Map<String, String> FILE_TYPE_MAP;
|
||||
|
||||
static {
|
||||
FILE_TYPE_MAP = new ConcurrentSkipListMap<>((s1, s2) -> {
|
||||
int len1 = s1.length();
|
||||
int len2 = s2.length();
|
||||
if (len1 == len2) {
|
||||
return s1.compareTo(s2);
|
||||
} else {
|
||||
return len2 - len1;
|
||||
}
|
||||
});
|
||||
|
||||
FILE_TYPE_MAP.put("ffd8ff", "jpg"); // JPEG (jpg)
|
||||
FILE_TYPE_MAP.put("89504e47", "png"); // PNG (png)
|
||||
FILE_TYPE_MAP.put("4749463837", "gif"); // GIF (gif)
|
||||
FILE_TYPE_MAP.put("4749463839", "gif"); // GIF (gif)
|
||||
FILE_TYPE_MAP.put("49492a00227105008037", "tif"); // TIFF (tif)
|
||||
// https://github.com/sindresorhus/file-type/blob/main/core.js#L90
|
||||
FILE_TYPE_MAP.put("424d", "bmp"); // 位图(bmp)
|
||||
FILE_TYPE_MAP.put("41433130313500000000", "dwg"); // CAD (dwg)
|
||||
FILE_TYPE_MAP.put("7b5c727466315c616e73", "rtf"); // Rich Text Format (rtf)
|
||||
FILE_TYPE_MAP.put("38425053000100000000", "psd"); // Photoshop (psd)
|
||||
FILE_TYPE_MAP.put("46726f6d3a203d3f6762", "eml"); // Email [Outlook Express 6] (eml)
|
||||
FILE_TYPE_MAP.put("5374616E64617264204A", "mdb"); // MS Access (mdb)
|
||||
FILE_TYPE_MAP.put("252150532D41646F6265", "ps");
|
||||
FILE_TYPE_MAP.put("255044462d312e", "pdf"); // Adobe Acrobat (pdf)
|
||||
FILE_TYPE_MAP.put("2e524d46000000120001", "rmvb"); // rmvb/rm相同
|
||||
FILE_TYPE_MAP.put("464c5601050000000900", "flv"); // flv与f4v相同
|
||||
FILE_TYPE_MAP.put("0000001C66747970", "mp4");
|
||||
FILE_TYPE_MAP.put("00000020667479706", "mp4");
|
||||
FILE_TYPE_MAP.put("00000018667479706D70", "mp4");
|
||||
FILE_TYPE_MAP.put("49443303000000002176", "mp3");
|
||||
FILE_TYPE_MAP.put("000001ba210001000180", "mpg"); //
|
||||
FILE_TYPE_MAP.put("3026b2758e66cf11a6d9", "wmv"); // wmv与asf相同
|
||||
FILE_TYPE_MAP.put("52494646e27807005741", "wav"); // Wave (wav)
|
||||
FILE_TYPE_MAP.put("52494646d07d60074156", "avi");
|
||||
FILE_TYPE_MAP.put("4d546864000000060001", "mid"); // MIDI (mid)
|
||||
FILE_TYPE_MAP.put("526172211a0700cf9073", "rar"); // WinRAR
|
||||
FILE_TYPE_MAP.put("235468697320636f6e66", "ini");
|
||||
FILE_TYPE_MAP.put("504B03040a0000000000", "jar");
|
||||
FILE_TYPE_MAP.put("504B0304140008000800", "jar");
|
||||
// MS Excel 注意:word、msi 和 excel的文件头一样
|
||||
FILE_TYPE_MAP.put("d0cf11e0a1b11ae10", "xls");
|
||||
FILE_TYPE_MAP.put("504B0304", "zip");
|
||||
FILE_TYPE_MAP.put("4d5a9000030000000400", "exe"); // 可执行文件
|
||||
FILE_TYPE_MAP.put("3c25402070616765206c", "jsp"); // jsp文件
|
||||
FILE_TYPE_MAP.put("4d616e69666573742d56", "mf"); // MF文件
|
||||
FILE_TYPE_MAP.put("7061636b616765207765", "java"); // java文件
|
||||
FILE_TYPE_MAP.put("406563686f206f66660d", "bat"); // bat文件
|
||||
FILE_TYPE_MAP.put("1f8b0800000000000000", "gz"); // gz文件
|
||||
FILE_TYPE_MAP.put("cafebabe0000002e0041", "class"); // class文件
|
||||
FILE_TYPE_MAP.put("49545346030000006000", "chm"); // chm文件
|
||||
FILE_TYPE_MAP.put("04000000010000001300", "mxp"); // mxp文件
|
||||
FILE_TYPE_MAP.put("6431303a637265617465", "torrent");
|
||||
FILE_TYPE_MAP.put("6D6F6F76", "mov"); // Quicktime (mov)
|
||||
FILE_TYPE_MAP.put("FF575043", "wpd"); // WordPerfect (wpd)
|
||||
FILE_TYPE_MAP.put("CFAD12FEC5FD746F", "dbx"); // Outlook Express (dbx)
|
||||
FILE_TYPE_MAP.put("2142444E", "pst"); // Outlook (pst)
|
||||
FILE_TYPE_MAP.put("AC9EBD8F", "qdf"); // Quicken (qdf)
|
||||
FILE_TYPE_MAP.put("E3828596", "pwl"); // Windows Password (pwl)
|
||||
FILE_TYPE_MAP.put("2E7261FD", "ram"); // Real Audio (ram)
|
||||
// https://stackoverflow.com/questions/45321665/magic-number-for-google-image-format
|
||||
FILE_TYPE_MAP.put("52494646", "webp");
|
||||
}
|
||||
private static final Map<String, String> FILE_TYPE_MAP = new ConcurrentSkipListMap<>();
|
||||
|
||||
/**
|
||||
* 增加文件类型映射<br>
|
||||
@ -120,26 +55,51 @@ public class FileTypeUtil {
|
||||
return fileTypeEntry.getValue();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
byte[] bytes = (HexUtil.decodeHex(fileStreamHexHead));
|
||||
return FileMagicNumber.getMagicNumber(bytes).getExtension();
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件流的头部信息获得文件类型
|
||||
*
|
||||
* @param in 文件流
|
||||
* @param fileHeadSize 自定义读取文件头部的大小
|
||||
* @return 文件类型,未找到为{@code null}
|
||||
*/
|
||||
public static String getType(InputStream in,int fileHeadSize) throws IORuntimeException {
|
||||
return getType((IoUtil.readHex(in, fileHeadSize,false)));
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件流的头部信息获得文件类型<br>
|
||||
* 注意此方法会读取头部28个bytes,造成此流接下来读取时缺少部分bytes<br>
|
||||
* 注意此方法会读取头部一些bytes,造成此流接下来读取时缺少部分bytes<br>
|
||||
* 因此如果想服用此流,流需支持{@link InputStream#reset()}方法。
|
||||
*
|
||||
* @param in {@link InputStream}
|
||||
* @param isExact 是否精确匹配,如果为false,使用前64个bytes匹配,如果为true,使用前8192bytes匹配
|
||||
* @return 类型,文件的扩展名,未找到为{@code null}
|
||||
* @throws IORuntimeException 读取流引起的异常
|
||||
* @throws IORuntimeException 读取流引起的异常
|
||||
*/
|
||||
public static String getType(InputStream in) throws IORuntimeException {
|
||||
return getType(IoUtil.readHex28Upper(in));
|
||||
public static String getType(InputStream in,boolean isExact) throws IORuntimeException {
|
||||
return isExact
|
||||
?getType(IoUtil.readHex8192Upper(in))
|
||||
:getType(IoUtil.readHex64Upper(in));
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件流的头部信息获得文件类型<br>
|
||||
* 注意此方法会读取头部64个bytes,造成此流接下来读取时缺少部分bytes<br>
|
||||
* 因此如果想服用此流,流需支持{@link InputStream#reset()}方法。
|
||||
* @param in {@link InputStream}
|
||||
* @return 类型,文件的扩展名,未找到为{@code null}
|
||||
* @throws IORuntimeException 读取流引起的异常
|
||||
*/
|
||||
public static String getType(InputStream in) throws IORuntimeException {
|
||||
return getType(in,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件流的头部信息获得文件类型
|
||||
* 注意此方法会读取头部28个bytes,造成此流接下来读取时缺少部分bytes<br>
|
||||
* 注意此方法会读取头部64个bytes,造成此流接下来读取时缺少部分bytes<br>
|
||||
* 因此如果想服用此流,流需支持{@link InputStream#reset()}方法。
|
||||
*
|
||||
* <pre>
|
||||
@ -151,24 +111,33 @@ public class FileTypeUtil {
|
||||
* @param in {@link InputStream}
|
||||
* @param filename 文件名
|
||||
* @return 类型,文件的扩展名,未找到为{@code null}
|
||||
* @throws IORuntimeException 读取流引起的异常
|
||||
* @throws IORuntimeException 读取流引起的异常
|
||||
*/
|
||||
public static String getType(InputStream in, String filename) {
|
||||
String typeName = getType(in);
|
||||
public static String getType(InputStream in, String filename) throws IORuntimeException {
|
||||
return getType(in,filename,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件流的头部信息获得文件类型
|
||||
* 注意此方法会读取头部一些bytes,造成此流接下来读取时缺少部分bytes<br>
|
||||
* 因此如果想服用此流,流需支持{@link InputStream#reset()}方法。
|
||||
*
|
||||
* <pre>
|
||||
* 1、无法识别类型默认按照扩展名识别
|
||||
* 2、xls、doc、msi头信息无法区分,按照扩展名区分
|
||||
* 3、zip可能为docx、xlsx、pptx、jar、war、ofd头信息无法区分,按照扩展名区分
|
||||
* </pre>
|
||||
* @param in {@link InputStream}
|
||||
* @param filename 文件名
|
||||
* @param isExact 是否精确匹配,如果为false,使用前64个bytes匹配,如果为true,使用前8192bytes匹配
|
||||
* @return 类型,文件的扩展名,未找到为{@code null}
|
||||
* @throws IORuntimeException 读取流引起的异常
|
||||
*/
|
||||
public static String getType(InputStream in, String filename,boolean isExact) throws IORuntimeException {
|
||||
String typeName = getType(in,isExact);
|
||||
if (null == typeName) {
|
||||
// 未成功识别类型,扩展名辅助识别
|
||||
typeName = FileUtil.extName(filename);
|
||||
} else if ("xls".equals(typeName)) {
|
||||
// xls、doc、msi的头一样,使用扩展名辅助判断
|
||||
final String extName = FileUtil.extName(filename);
|
||||
if ("doc".equalsIgnoreCase(extName)) {
|
||||
typeName = "doc";
|
||||
} else if ("msi".equalsIgnoreCase(extName)) {
|
||||
typeName = "msi";
|
||||
} else if ("ppt".equalsIgnoreCase(extName)) {
|
||||
typeName = "ppt";
|
||||
}
|
||||
} else if ("zip".equals(typeName)) {
|
||||
// zip可能为docx、xlsx、pptx、jar、war、ofd等格式,扩展名辅助判断
|
||||
final String extName = FileUtil.extName(filename);
|
||||
@ -213,21 +182,51 @@ public class FileTypeUtil {
|
||||
* <pre>
|
||||
* 1、无法识别类型默认按照扩展名识别
|
||||
* 2、xls、doc、msi头信息无法区分,按照扩展名区分
|
||||
* 3、zip可能为docx、xlsx、pptx、jar、war头信息无法区分,按照扩展名区分
|
||||
* 3、zip可能为jar、war头信息无法区分,按照扩展名区分
|
||||
* </pre>
|
||||
*
|
||||
* @param file 文件 {@link File}
|
||||
* @param isExact 是否精确匹配,如果为false,使用前64个bytes匹配,如果为true,使用前8192bytes匹配
|
||||
* @return 类型,文件的扩展名,未找到为{@code null}
|
||||
* @throws IORuntimeException 读取文件引起的异常
|
||||
*/
|
||||
public static String getType(File file,boolean isExact) throws IORuntimeException {
|
||||
FileInputStream in = null;
|
||||
try {
|
||||
in = IoUtil.toStream(file);
|
||||
return getType(in, file.getName(),isExact);
|
||||
} finally {
|
||||
IoUtil.close(in);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件流的头部信息获得文件类型
|
||||
*
|
||||
* <pre>
|
||||
* 1、无法识别类型默认按照扩展名识别
|
||||
* 2、xls、doc、msi头信息无法区分,按照扩展名区分
|
||||
* 3、zip可能为jar、war头信息无法区分,按照扩展名区分
|
||||
* </pre>
|
||||
*
|
||||
* @param file 文件 {@link File}
|
||||
* @return 类型,文件的扩展名,未找到为{@code null}
|
||||
* @throws IORuntimeException 读取文件引起的异常
|
||||
* @throws IORuntimeException 读取文件引起的异常
|
||||
*/
|
||||
public static String getType(File file) throws IORuntimeException {
|
||||
FileInputStream in = null;
|
||||
try {
|
||||
in = IoUtil.toStream(file);
|
||||
return getType(in, file.getName());
|
||||
} finally {
|
||||
IoUtil.close(in);
|
||||
}
|
||||
public static String getType(File file) throws IORuntimeException {
|
||||
return getType(file,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过路径获得文件类型
|
||||
*
|
||||
* @param path 路径,绝对路径或相对ClassPath的路径
|
||||
* @param isExact 是否精确匹配,如果为false,使用前64个bytes匹配,如果为true,使用前8192bytes匹配
|
||||
* @return 类型
|
||||
* @throws IORuntimeException 读取文件引起的异常
|
||||
*/
|
||||
public static String getTypeByPath(String path,boolean isExact) throws IORuntimeException {
|
||||
return getType(FileUtil.file(path),isExact);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -235,9 +234,11 @@ public class FileTypeUtil {
|
||||
*
|
||||
* @param path 路径,绝对路径或相对ClassPath的路径
|
||||
* @return 类型
|
||||
* @throws IORuntimeException 读取文件引起的异常
|
||||
* @throws IORuntimeException 读取文件引起的异常
|
||||
*/
|
||||
public static String getTypeByPath(String path) throws IORuntimeException {
|
||||
return getType(FileUtil.file(path));
|
||||
public static String getTypeByPath(String path) throws IORuntimeException {
|
||||
return getTypeByPath(path,false);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -530,25 +530,41 @@ public class IoUtil extends NioUtil {
|
||||
}
|
||||
|
||||
/**
|
||||
* 从流中读取前28个byte并转换为16进制,字母部分使用大写
|
||||
* 从流中读取前64个byte并转换为16进制,字母部分使用大写
|
||||
*
|
||||
* @param in {@link InputStream}
|
||||
* @return 16进制字符串
|
||||
* @throws IORuntimeException IO异常
|
||||
*/
|
||||
public static String readHex28Upper(InputStream in) throws IORuntimeException {
|
||||
return readHex(in, 28, false);
|
||||
public static String readHex64Upper(InputStream in) throws IORuntimeException {
|
||||
return readHex(in, 64, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从流中读取前28个byte并转换为16进制,字母部分使用小写
|
||||
* 从流中读取前8192个byte并转换为16进制,字母部分使用大写
|
||||
*
|
||||
* @param in {@link InputStream}
|
||||
* @return 16进制字符串
|
||||
* @throws IORuntimeException IO异常
|
||||
*/
|
||||
public static String readHex28Lower(InputStream in) throws IORuntimeException {
|
||||
return readHex(in, 28, true);
|
||||
public static String readHex8192Upper(InputStream in) throws IORuntimeException {
|
||||
try {
|
||||
int i = in.available();
|
||||
return readHex(in, Math.min(8192, in.available()), false);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从流中读取前64个byte并转换为16进制,字母部分使用小写
|
||||
*
|
||||
* @param in {@link InputStream}
|
||||
* @return 16进制字符串
|
||||
* @throws IORuntimeException IO异常
|
||||
*/
|
||||
public static String readHex64Lower(InputStream in) throws IORuntimeException {
|
||||
return readHex(in, 64, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -48,7 +48,7 @@ public class FileTypeUtilTest {
|
||||
@Ignore
|
||||
public void ofdTest() {
|
||||
File file = FileUtil.file("e:/test.ofd");
|
||||
String hex = IoUtil.readHex28Upper(FileUtil.getInputStream(file));
|
||||
String hex = IoUtil.readHex64Upper(FileUtil.getInputStream(file));
|
||||
Console.log(hex);
|
||||
String type = FileTypeUtil.getType(file);
|
||||
Console.log(type);
|
||||
|
Loading…
Reference in New Issue
Block a user