From 27757714d0420192e6139d1e4206446dcefe6531 Mon Sep 17 00:00:00 2001 From: zeripath Date: Tue, 31 Dec 2019 01:53:28 +0000 Subject: Change markdown rendering from blackfriday to goldmark (#9533) * Move to goldmark Markdown rendering moved from blackfriday to the goldmark. Multiple subtle changes required to the goldmark extensions to keep current rendering and defaults. Can go further with goldmark linkify and have this work within markdown rendering making the link processor unnecessary. Need to think about how to go about allowing extensions - at present it seems that these would be hard to do without recompilation. * linter fixes Co-authored-by: Lauris BH --- modules/markup/mdstripper/mdstripper.go | 177 +++++++++++++++------------ modules/markup/mdstripper/mdstripper_test.go | 14 +++ 2 files changed, 110 insertions(+), 81 deletions(-) (limited to 'modules/markup/mdstripper') diff --git a/modules/markup/mdstripper/mdstripper.go b/modules/markup/mdstripper/mdstripper.go index d248944b68..9d05ee3969 100644 --- a/modules/markup/mdstripper/mdstripper.go +++ b/modules/markup/mdstripper/mdstripper.go @@ -6,113 +6,128 @@ package mdstripper import ( "bytes" - "io" + "sync" - "github.com/russross/blackfriday/v2" -) + "io" -// MarkdownStripper extends blackfriday.Renderer -type MarkdownStripper struct { - links []string - coallesce bool - empty bool -} + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/markup/common" -const ( - blackfridayExtensions = 0 | - blackfriday.NoIntraEmphasis | - blackfriday.Tables | - blackfriday.FencedCode | - blackfriday.Strikethrough | - blackfriday.NoEmptyLineBeforeBlock | - blackfriday.DefinitionLists | - blackfriday.Footnotes | - blackfriday.HeadingIDs | - blackfriday.AutoHeadingIDs | - // Not included in modules/markup/markdown/markdown.go; - // required here to process inline links - blackfriday.Autolink + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" ) -// StripMarkdown parses markdown content by removing all markup and code blocks -// in order to extract links and other references -func StripMarkdown(rawBytes []byte) (string, []string) { - buf, links := StripMarkdownBytes(rawBytes) - return string(buf), links +type stripRenderer struct { + links []string + empty bool } -// StripMarkdownBytes parses markdown content by removing all markup and code blocks -// in order to extract links and other references -func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { - stripper := &MarkdownStripper{ - links: make([]string, 0, 10), - empty: true, - } - - parser := blackfriday.New(blackfriday.WithRenderer(stripper), blackfriday.WithExtensions(blackfridayExtensions)) - ast := parser.Parse(rawBytes) - var buf bytes.Buffer - stripper.RenderHeader(&buf, ast) - ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - return stripper.RenderNode(&buf, node, entering) +func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error { + return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + switch v := n.(type) { + case *ast.Text: + if !v.IsRaw() { + _, prevSibIsText := n.PreviousSibling().(*ast.Text) + coalesce := prevSibIsText + r.processString( + w, + v.Text(source), + coalesce) + if v.SoftLineBreak() { + r.doubleSpace(w) + } + } + return ast.WalkContinue, nil + case *ast.Link: + r.processLink(w, v.Destination) + return ast.WalkSkipChildren, nil + case *ast.AutoLink: + r.processLink(w, v.URL(source)) + return ast.WalkSkipChildren, nil + } + return ast.WalkContinue, nil }) - stripper.RenderFooter(&buf, ast) - return buf.Bytes(), stripper.GetLinks() -} - -// RenderNode is the main rendering method. It will be called once for -// every leaf node and twice for every non-leaf node (first with -// entering=true, then with entering=false). The method should write its -// rendition of the node to the supplied writer w. -func (r *MarkdownStripper) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - if !entering { - return blackfriday.GoToNext - } - switch node.Type { - case blackfriday.Text: - r.processString(w, node.Literal, node.Parent == nil) - return blackfriday.GoToNext - case blackfriday.Link: - r.processLink(w, node.LinkData.Destination) - r.coallesce = false - return blackfriday.SkipChildren - } - r.coallesce = false - return blackfriday.GoToNext -} - -// RenderHeader is a method that allows the renderer to produce some -// content preceding the main body of the output document. -func (r *MarkdownStripper) RenderHeader(w io.Writer, ast *blackfriday.Node) { } -// RenderFooter is a symmetric counterpart of RenderHeader. -func (r *MarkdownStripper) RenderFooter(w io.Writer, ast *blackfriday.Node) { -} - -func (r *MarkdownStripper) doubleSpace(w io.Writer) { +func (r *stripRenderer) doubleSpace(w io.Writer) { if !r.empty { _, _ = w.Write([]byte{'\n'}) } } -func (r *MarkdownStripper) processString(w io.Writer, text []byte, coallesce bool) { +func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) { // Always break-up words - if !coallesce || !r.coallesce { + if !coalesce { r.doubleSpace(w) } _, _ = w.Write(text) - r.coallesce = coallesce r.empty = false } -func (r *MarkdownStripper) processLink(w io.Writer, link []byte) { +func (r *stripRenderer) processLink(w io.Writer, link []byte) { // Links are processed out of band r.links = append(r.links, string(link)) - r.coallesce = false } // GetLinks returns the list of link data collected while parsing -func (r *MarkdownStripper) GetLinks() []string { +func (r *stripRenderer) GetLinks() []string { return r.links } + +// AddOptions adds given option to this renderer. +func (r *stripRenderer) AddOptions(...renderer.Option) { + // no-op +} + +// StripMarkdown parses markdown content by removing all markup and code blocks +// in order to extract links and other references +func StripMarkdown(rawBytes []byte) (string, []string) { + buf, links := StripMarkdownBytes(rawBytes) + return string(buf), links +} + +var stripParser parser.Parser +var once = sync.Once{} + +// StripMarkdownBytes parses markdown content by removing all markup and code blocks +// in order to extract links and other references +func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { + once.Do(func() { + gdMarkdown := goldmark.New( + goldmark.WithExtensions(extension.Table, + extension.Strikethrough, + extension.TaskList, + extension.DefinitionList, + common.FootnoteExtension, + common.Linkify, + ), + goldmark.WithParserOptions( + parser.WithAttribute(), + parser.WithAutoHeadingID(), + ), + goldmark.WithRendererOptions( + html.WithUnsafe(), + ), + ) + stripParser = gdMarkdown.Parser() + }) + stripper := &stripRenderer{ + links: make([]string, 0, 10), + empty: true, + } + reader := text.NewReader(rawBytes) + doc := stripParser.Parse(reader) + var buf bytes.Buffer + if err := stripper.Render(&buf, rawBytes, doc); err != nil { + log.Error("Unable to strip: %v", err) + } + return buf.Bytes(), stripper.GetLinks() +} diff --git a/modules/markup/mdstripper/mdstripper_test.go b/modules/markup/mdstripper/mdstripper_test.go index 157fe1975b..9efcc35949 100644 --- a/modules/markup/mdstripper/mdstripper_test.go +++ b/modules/markup/mdstripper/mdstripper_test.go @@ -53,6 +53,20 @@ A HIDDEN ` + "`" + `GHOST` + "`" + ` IN THIS LINE. []string{ "link", }}, + { + "Simply closes: #29 yes", + []string{ + "Simply closes: #29 yes", + }, + []string{}, + }, + { + "Simply closes: !29 yes", + []string{ + "Simply closes: !29 yes", + }, + []string{}, + }, } for _, test := range list { -- cgit v1.2.3