mindoc/utils/html.go
Go-Go-Farther c8f7a2a544
修复附件不能下载问题
fixes #850
1. 修复文档为空附件不能下载
2. 修复文档设置为非markdown编辑附件不能下载
3. 兼容现存问题文档
4. 非markdown编辑文档,阅读时增加文档修改信息
2023-06-07 10:04:11 +08:00

129 lines
3.5 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package utils
import (
"bytes"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/mindoc-org/mindoc/conf"
)
func StripTags(s string) string {
//将HTML标签全转换成小写
re, _ := regexp.Compile("\\<[\\S\\s]+?\\>")
src := re.ReplaceAllStringFunc(s, strings.ToLower)
//去除STYLE
re, _ = regexp.Compile("\\<style[\\S\\s]+?\\</style\\>")
src = re.ReplaceAllString(src, "")
//去除SCRIPT
re, _ = regexp.Compile("\\<script[\\S\\s]+?\\</script\\>")
src = re.ReplaceAllString(src, "")
//去除所有尖括号内的HTML代码并换成换行符
re, _ = regexp.Compile("\\<[\\S\\s]+?\\>")
src = re.ReplaceAllString(src, "\n")
//去除连续的换行符
re, _ = regexp.Compile("\\s{2,}")
src = re.ReplaceAllString(src, "\n")
return src
}
//自动提取文章摘要
func AutoSummary(body string, l int) string {
//匹配图片,如果图片语法是在代码块中,这里同样会处理
re := regexp.MustCompile(`<p>(.*?)</p>`)
contents := re.FindAllString(body, -1)
if len(contents) <= 0 {
return ""
}
content := ""
for _, s := range contents {
b := strings.Replace(StripTags(s), "\n", "", -1)
if l <= 0 {
break
}
l = l - len([]rune(b))
content += b
}
return content
}
//安全处理HTML文档过滤危险标签和属性.
func SafetyProcessor(html string) string {
//安全过滤,移除危险标签和属性
if docQuery, err := goquery.NewDocumentFromReader(bytes.NewBufferString(html)); err == nil {
docQuery.Find("script").Remove()
docQuery.Find("form").Remove()
docQuery.Find("link").Remove()
docQuery.Find("applet").Remove()
docQuery.Find("frame").Remove()
docQuery.Find("meta").Remove()
if !conf.GetEnableIframe() {
docQuery.Find("iframe").Remove()
}
docQuery.Find("*").Each(func(i int, selection *goquery.Selection) {
if href, ok := selection.Attr("href"); ok && strings.HasPrefix(href, "javascript:") {
selection.SetAttr("href", "#")
}
if src, ok := selection.Attr("src"); ok && strings.HasPrefix(src, "javascript:") {
selection.SetAttr("src", "#")
}
selection.RemoveAttr("onafterprint").
RemoveAttr("onbeforeprint").
RemoveAttr("onbeforeunload").
RemoveAttr("onload").
RemoveAttr("onclick").
RemoveAttr("onkeydown").
RemoveAttr("onkeypress").
RemoveAttr("onkeyup").
RemoveAttr("ondblclick").
RemoveAttr("onmousedown").
RemoveAttr("onmousemove").
RemoveAttr("onmouseout").
RemoveAttr("onmouseover").
RemoveAttr("onmouseup")
})
//处理外链
docQuery.Find("a").Each(func(i int, contentSelection *goquery.Selection) {
if src, ok := contentSelection.Attr("href"); ok {
if strings.HasPrefix(src, "http://") || strings.HasPrefix(src, "https://") {
if conf.BaseUrl != "" && !strings.HasPrefix(src, conf.BaseUrl) {
contentSelection.SetAttr("target", "_blank")
}
}
}
})
//添加文档标签包裹
if selector := docQuery.Find("div.whole-article-wrap").First(); selector.Size() <= 0 {
docQuery.Find("body").Children().WrapAllHtml("<div class=\"whole-article-wrap\"></div>")
}
//解决文档内容缺少包裹标签的问题
if selector := docQuery.Find("div.markdown-article").First(); selector.Size() <= 0 {
if selector := docQuery.Find("div.markdown-toc").First(); selector.Size() > 0 {
docQuery.Find("div.markdown-toc").NextAll().WrapAllHtml("<div class=\"markdown-article\"></div>")
}
}
if html, err := docQuery.Html(); err == nil {
return strings.TrimSuffix(strings.TrimPrefix(strings.TrimSpace(html), "<html><head></head><body>"), "</body></html>")
}
}
return html
}