diff options
author | zeripath <art27@cantab.net> | 2019-12-31 01:53:28 +0000 |
---|---|---|
committer | Lauris BH <lauris@nix.lv> | 2019-12-31 03:53:28 +0200 |
commit | 27757714d0420192e6139d1e4206446dcefe6531 (patch) | |
tree | d237deeb48315f1465dbd915b65f95973c4cbbe9 /modules/markup/mdstripper | |
parent | 0c07f1de5b8cf7b9f9f607aae76a53c99aeb2c04 (diff) | |
download | gitea-27757714d0420192e6139d1e4206446dcefe6531.tar.gz gitea-27757714d0420192e6139d1e4206446dcefe6531.zip |
Change markdown rendering from blackfriday to goldmark (#9533)
* Move to goldmark
Markdown rendering moved from blackfriday to the goldmark.
Multiple subtle changes required to the goldmark extensions to keep
current rendering and defaults.
Can go further with goldmark linkify and have this work within markdown
rendering making the link processor unnecessary.
Need to think about how to go about allowing extensions - at present it
seems that these would be hard to do without recompilation.
* linter fixes
Co-authored-by: Lauris BH <lauris@nix.lv>
Diffstat (limited to 'modules/markup/mdstripper')
-rw-r--r-- | modules/markup/mdstripper/mdstripper.go | 177 | ||||
-rw-r--r-- | modules/markup/mdstripper/mdstripper_test.go | 14 |
2 files changed, 110 insertions, 81 deletions
diff --git a/modules/markup/mdstripper/mdstripper.go b/modules/markup/mdstripper/mdstripper.go index d248944b68..9d05ee3969 100644 --- a/modules/markup/mdstripper/mdstripper.go +++ b/modules/markup/mdstripper/mdstripper.go @@ -6,113 +6,128 @@ package mdstripper import ( "bytes" - "io" + "sync" - "github.com/russross/blackfriday/v2" -) + "io" -// MarkdownStripper extends blackfriday.Renderer -type MarkdownStripper struct { - links []string - coallesce bool - empty bool -} + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/markup/common" -const ( - blackfridayExtensions = 0 | - blackfriday.NoIntraEmphasis | - blackfriday.Tables | - blackfriday.FencedCode | - blackfriday.Strikethrough | - blackfriday.NoEmptyLineBeforeBlock | - blackfriday.DefinitionLists | - blackfriday.Footnotes | - blackfriday.HeadingIDs | - blackfriday.AutoHeadingIDs | - // Not included in modules/markup/markdown/markdown.go; - // required here to process inline links - blackfriday.Autolink + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" ) -// StripMarkdown parses markdown content by removing all markup and code blocks -// in order to extract links and other references -func StripMarkdown(rawBytes []byte) (string, []string) { - buf, links := StripMarkdownBytes(rawBytes) - return string(buf), links +type stripRenderer struct { + links []string + empty bool } -// StripMarkdownBytes parses markdown content by removing all markup and code blocks -// in order to extract links and other references -func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { - stripper := &MarkdownStripper{ - links: make([]string, 0, 10), - empty: true, - } - - parser := blackfriday.New(blackfriday.WithRenderer(stripper), blackfriday.WithExtensions(blackfridayExtensions)) - ast := parser.Parse(rawBytes) - var buf bytes.Buffer - stripper.RenderHeader(&buf, ast) - ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - return stripper.RenderNode(&buf, node, entering) +func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error { + return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + switch v := n.(type) { + case *ast.Text: + if !v.IsRaw() { + _, prevSibIsText := n.PreviousSibling().(*ast.Text) + coalesce := prevSibIsText + r.processString( + w, + v.Text(source), + coalesce) + if v.SoftLineBreak() { + r.doubleSpace(w) + } + } + return ast.WalkContinue, nil + case *ast.Link: + r.processLink(w, v.Destination) + return ast.WalkSkipChildren, nil + case *ast.AutoLink: + r.processLink(w, v.URL(source)) + return ast.WalkSkipChildren, nil + } + return ast.WalkContinue, nil }) - stripper.RenderFooter(&buf, ast) - return buf.Bytes(), stripper.GetLinks() -} - -// RenderNode is the main rendering method. It will be called once for -// every leaf node and twice for every non-leaf node (first with -// entering=true, then with entering=false). The method should write its -// rendition of the node to the supplied writer w. -func (r *MarkdownStripper) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - if !entering { - return blackfriday.GoToNext - } - switch node.Type { - case blackfriday.Text: - r.processString(w, node.Literal, node.Parent == nil) - return blackfriday.GoToNext - case blackfriday.Link: - r.processLink(w, node.LinkData.Destination) - r.coallesce = false - return blackfriday.SkipChildren - } - r.coallesce = false - return blackfriday.GoToNext -} - -// RenderHeader is a method that allows the renderer to produce some -// content preceding the main body of the output document. -func (r *MarkdownStripper) RenderHeader(w io.Writer, ast *blackfriday.Node) { } -// RenderFooter is a symmetric counterpart of RenderHeader. -func (r *MarkdownStripper) RenderFooter(w io.Writer, ast *blackfriday.Node) { -} - -func (r *MarkdownStripper) doubleSpace(w io.Writer) { +func (r *stripRenderer) doubleSpace(w io.Writer) { if !r.empty { _, _ = w.Write([]byte{'\n'}) } } -func (r *MarkdownStripper) processString(w io.Writer, text []byte, coallesce bool) { +func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) { // Always break-up words - if !coallesce || !r.coallesce { + if !coalesce { r.doubleSpace(w) } _, _ = w.Write(text) - r.coallesce = coallesce r.empty = false } -func (r *MarkdownStripper) processLink(w io.Writer, link []byte) { +func (r *stripRenderer) processLink(w io.Writer, link []byte) { // Links are processed out of band r.links = append(r.links, string(link)) - r.coallesce = false } // GetLinks returns the list of link data collected while parsing -func (r *MarkdownStripper) GetLinks() []string { +func (r *stripRenderer) GetLinks() []string { return r.links } + +// AddOptions adds given option to this renderer. +func (r *stripRenderer) AddOptions(...renderer.Option) { + // no-op +} + +// StripMarkdown parses markdown content by removing all markup and code blocks +// in order to extract links and other references +func StripMarkdown(rawBytes []byte) (string, []string) { + buf, links := StripMarkdownBytes(rawBytes) + return string(buf), links +} + +var stripParser parser.Parser +var once = sync.Once{} + +// StripMarkdownBytes parses markdown content by removing all markup and code blocks +// in order to extract links and other references +func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { + once.Do(func() { + gdMarkdown := goldmark.New( + goldmark.WithExtensions(extension.Table, + extension.Strikethrough, + extension.TaskList, + extension.DefinitionList, + common.FootnoteExtension, + common.Linkify, + ), + goldmark.WithParserOptions( + parser.WithAttribute(), + parser.WithAutoHeadingID(), + ), + goldmark.WithRendererOptions( + html.WithUnsafe(), + ), + ) + stripParser = gdMarkdown.Parser() + }) + stripper := &stripRenderer{ + links: make([]string, 0, 10), + empty: true, + } + reader := text.NewReader(rawBytes) + doc := stripParser.Parse(reader) + var buf bytes.Buffer + if err := stripper.Render(&buf, rawBytes, doc); err != nil { + log.Error("Unable to strip: %v", err) + } + return buf.Bytes(), stripper.GetLinks() +} diff --git a/modules/markup/mdstripper/mdstripper_test.go b/modules/markup/mdstripper/mdstripper_test.go index 157fe1975b..9efcc35949 100644 --- a/modules/markup/mdstripper/mdstripper_test.go +++ b/modules/markup/mdstripper/mdstripper_test.go @@ -53,6 +53,20 @@ A HIDDEN ` + "`" + `GHOST` + "`" + ` IN THIS LINE. []string{ "link", }}, + { + "Simply closes: #29 yes", + []string{ + "Simply closes: #29 yes", + }, + []string{}, + }, + { + "Simply closes: !29 yes", + []string{ + "Simply closes: !29 yes", + }, + []string{}, + }, } for _, test := range list { |