summaryrefslogtreecommitdiffstats
path: root/modules/markup/mdstripper/mdstripper.go
blob: d248944b6870aee5edba5f2b14f4d91f4f647b54 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package mdstripper

import (
	"bytes"
	"io"

	"github.com/russross/blackfriday/v2"
)

// MarkdownStripper extends blackfriday.Renderer
type MarkdownStripper struct {
	links     []string
	coallesce bool
	empty     bool
}

const (
	blackfridayExtensions = 0 |
		blackfriday.NoIntraEmphasis |
		blackfriday.Tables |
		blackfriday.FencedCode |
		blackfriday.Strikethrough |
		blackfriday.NoEmptyLineBeforeBlock |
		blackfriday.DefinitionLists |
		blackfriday.Footnotes |
		blackfriday.HeadingIDs |
		blackfriday.AutoHeadingIDs |
		// Not included in modules/markup/markdown/markdown.go;
		// required here to process inline links
		blackfriday.Autolink
)

// StripMarkdown parses markdown content by removing all markup and code blocks
//	in order to extract links and other references
func StripMarkdown(rawBytes []byte) (string, []string) {
	buf, links := StripMarkdownBytes(rawBytes)
	return string(buf), links
}

// StripMarkdownBytes parses markdown content by removing all markup and code blocks
//	in order to extract links and other references
func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
	stripper := &MarkdownStripper{
		links: make([]string, 0, 10),
		empty: true,
	}

	parser := blackfriday.New(blackfriday.WithRenderer(stripper), blackfriday.WithExtensions(blackfridayExtensions))
	ast := parser.Parse(rawBytes)
	var buf bytes.Buffer
	stripper.RenderHeader(&buf, ast)
	ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
		return stripper.RenderNode(&buf, node, entering)
	})
	stripper.RenderFooter(&buf, ast)
	return buf.Bytes(), stripper.GetLinks()
}

// RenderNode is the main rendering method. It will be called once for
// every leaf node and twice for every non-leaf node (first with
// entering=true, then with entering=false). The method should write its
// rendition of the node to the supplied writer w.
func (r *MarkdownStripper) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
	if !entering {
		return blackfriday.GoToNext
	}
	switch node.Type {
	case blackfriday.Text:
		r.processString(w, node.Literal, node.Parent == nil)
		return blackfriday.GoToNext
	case blackfriday.Link:
		r.processLink(w, node.LinkData.Destination)
		r.coallesce = false
		return blackfriday.SkipChildren
	}
	r.coallesce = false
	return blackfriday.GoToNext
}

// RenderHeader is a method that allows the renderer to produce some
// content preceding the main body of the output document.
func (r *MarkdownStripper) RenderHeader(w io.Writer, ast *blackfriday.Node) {
}

// RenderFooter is a symmetric counterpart of RenderHeader.
func (r *MarkdownStripper) RenderFooter(w io.Writer, ast *blackfriday.Node) {
}

func (r *MarkdownStripper) doubleSpace(w io.Writer) {
	if !r.empty {
		_, _ = w.Write([]byte{'\n'})
	}
}

func (r *MarkdownStripper) processString(w io.Writer, text []byte, coallesce bool) {
	// Always break-up words
	if !coallesce || !r.coallesce {
		r.doubleSpace(w)
	}
	_, _ = w.Write(text)
	r.coallesce = coallesce
	r.empty = false
}

func (r *MarkdownStripper) processLink(w io.Writer, link []byte) {
	// Links are processed out of band
	r.links = append(r.links, string(link))
	r.coallesce = false
}

// GetLinks returns the list of link data collected while parsing
func (r *MarkdownStripper) GetLinks() []string {
	return r.links
}