1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package mdstripper
import (
"bytes"
"sync"
"io"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup/common"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
)
type stripRenderer struct {
links []string
empty bool
}
func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error {
return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkContinue, nil
}
switch v := n.(type) {
case *ast.Text:
if !v.IsRaw() {
_, prevSibIsText := n.PreviousSibling().(*ast.Text)
coalesce := prevSibIsText
r.processString(
w,
v.Text(source),
coalesce)
if v.SoftLineBreak() {
r.doubleSpace(w)
}
}
return ast.WalkContinue, nil
case *ast.Link:
r.processLink(w, v.Destination)
return ast.WalkSkipChildren, nil
case *ast.AutoLink:
r.processLink(w, v.URL(source))
return ast.WalkSkipChildren, nil
}
return ast.WalkContinue, nil
})
}
func (r *stripRenderer) doubleSpace(w io.Writer) {
if !r.empty {
_, _ = w.Write([]byte{'\n'})
}
}
func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) {
// Always break-up words
if !coalesce {
r.doubleSpace(w)
}
_, _ = w.Write(text)
r.empty = false
}
func (r *stripRenderer) processLink(w io.Writer, link []byte) {
// Links are processed out of band
r.links = append(r.links, string(link))
}
// GetLinks returns the list of link data collected while parsing
func (r *stripRenderer) GetLinks() []string {
return r.links
}
// AddOptions adds given option to this renderer.
func (r *stripRenderer) AddOptions(...renderer.Option) {
// no-op
}
// StripMarkdown parses markdown content by removing all markup and code blocks
// in order to extract links and other references
func StripMarkdown(rawBytes []byte) (string, []string) {
buf, links := StripMarkdownBytes(rawBytes)
return string(buf), links
}
var stripParser parser.Parser
var once = sync.Once{}
// StripMarkdownBytes parses markdown content by removing all markup and code blocks
// in order to extract links and other references
func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
once.Do(func() {
gdMarkdown := goldmark.New(
goldmark.WithExtensions(extension.Table,
extension.Strikethrough,
extension.TaskList,
extension.DefinitionList,
common.FootnoteExtension,
common.Linkify,
),
goldmark.WithParserOptions(
parser.WithAttribute(),
parser.WithAutoHeadingID(),
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
)
stripParser = gdMarkdown.Parser()
})
stripper := &stripRenderer{
links: make([]string, 0, 10),
empty: true,
}
reader := text.NewReader(rawBytes)
doc := stripParser.Parse(reader)
var buf bytes.Buffer
if err := stripper.Render(&buf, rawBytes, doc); err != nil {
log.Error("Unable to strip: %v", err)
}
return buf.Bytes(), stripper.GetLinks()
}
|