From 27757714d0420192e6139d1e4206446dcefe6531 Mon Sep 17 00:00:00 2001 From: zeripath Date: Tue, 31 Dec 2019 01:53:28 +0000 Subject: Change markdown rendering from blackfriday to goldmark (#9533) * Move to goldmark Markdown rendering moved from blackfriday to the goldmark. Multiple subtle changes required to the goldmark extensions to keep current rendering and defaults. Can go further with goldmark linkify and have this work within markdown rendering making the link processor unnecessary. Need to think about how to go about allowing extensions - at present it seems that these would be hard to do without recompilation. * linter fixes Co-authored-by: Lauris BH --- modules/markup/markdown/goldmark.go | 178 ++++++++++++++++++++++++++ modules/markup/markdown/markdown.go | 207 ++++++++++--------------------- modules/markup/markdown/markdown_test.go | 40 ++---- 3 files changed, 252 insertions(+), 173 deletions(-) create mode 100644 modules/markup/markdown/goldmark.go (limited to 'modules/markup/markdown') diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go new file mode 100644 index 0000000000..2a2a9dce6a --- /dev/null +++ b/modules/markup/markdown/goldmark.go @@ -0,0 +1,178 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markdown + +import ( + "bytes" + "fmt" + "strings" + + "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/markup/common" + giteautil "code.gitea.io/gitea/modules/util" + + "github.com/yuin/goldmark/ast" + east "github.com/yuin/goldmark/extension/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var byteMailto = []byte("mailto:") + +// GiteaASTTransformer is a default transformer of the goldmark tree. +type GiteaASTTransformer struct{} + +// Transform transforms the given AST tree. +func (g *GiteaASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + switch v := n.(type) { + case *ast.Image: + // Images need two things: + // + // 1. Their src needs to munged to be a real value + // 2. If they're not wrapped with a link they need a link wrapper + + // Check if the destination is a real link + link := v.Destination + if len(link) > 0 && !markup.IsLink(link) { + prefix := pc.Get(urlPrefixKey).(string) + if pc.Get(isWikiKey).(bool) { + prefix = giteautil.URLJoin(prefix, "wiki", "raw") + } + prefix = strings.Replace(prefix, "/src/", "/media/", 1) + + lnk := string(link) + lnk = giteautil.URLJoin(prefix, lnk) + lnk = strings.Replace(lnk, " ", "+", -1) + link = []byte(lnk) + } + v.Destination = link + + parent := n.Parent() + // Create a link around image only if parent is not already a link + if _, ok := parent.(*ast.Link); !ok && parent != nil { + wrap := ast.NewLink() + wrap.Destination = link + wrap.Title = v.Title + parent.ReplaceChild(parent, n, wrap) + wrap.AppendChild(wrap, n) + } + case *ast.Link: + // Links need their href to munged to be a real value + link := v.Destination + if len(link) > 0 && !markup.IsLink(link) && + link[0] != '#' && !bytes.HasPrefix(link, byteMailto) { + // special case: this is not a link, a hash link or a mailto:, so it's a + // relative URL + lnk := string(link) + if pc.Get(isWikiKey).(bool) { + lnk = giteautil.URLJoin("wiki", lnk) + } + link = []byte(giteautil.URLJoin(pc.Get(urlPrefixKey).(string), lnk)) + } + v.Destination = link + } + return ast.WalkContinue, nil + }) +} + +type prefixedIDs struct { + values map[string]bool +} + +// Generate generates a new element id. +func (p *prefixedIDs) Generate(value []byte, kind ast.NodeKind) []byte { + dft := []byte("id") + if kind == ast.KindHeading { + dft = []byte("heading") + } + return p.GenerateWithDefault(value, dft) +} + +// Generate generates a new element id. +func (p *prefixedIDs) GenerateWithDefault(value []byte, dft []byte) []byte { + result := common.CleanValue(value) + if len(result) == 0 { + result = dft + } + if !bytes.HasPrefix(result, []byte("user-content-")) { + result = append([]byte("user-content-"), result...) + } + if _, ok := p.values[util.BytesToReadOnlyString(result)]; !ok { + p.values[util.BytesToReadOnlyString(result)] = true + return result + } + for i := 1; ; i++ { + newResult := fmt.Sprintf("%s-%d", result, i) + if _, ok := p.values[newResult]; !ok { + p.values[newResult] = true + return []byte(newResult) + } + } +} + +// Put puts a given element id to the used ids table. +func (p *prefixedIDs) Put(value []byte) { + p.values[util.BytesToReadOnlyString(value)] = true +} + +func newPrefixedIDs() *prefixedIDs { + return &prefixedIDs{ + values: map[string]bool{}, + } +} + +// NewTaskCheckBoxHTMLRenderer creates a TaskCheckBoxHTMLRenderer to render tasklists +// in the gitea form. +func NewTaskCheckBoxHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &TaskCheckBoxHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// TaskCheckBoxHTMLRenderer is a renderer.NodeRenderer implementation that +// renders checkboxes in list items. +// Overrides the default goldmark one to present the gitea format +type TaskCheckBoxHTMLRenderer struct { + html.Config +} + +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *TaskCheckBoxHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(east.KindTaskCheckBox, r.renderTaskCheckBox) +} + +func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + n := node.(*east.TaskCheckBox) + + end := ">" + if r.XHTML { + end = " />" + } + var err error + if n.IsChecked { + _, err = w.WriteString(``) + } else { + _, err = w.WriteString(``) + } + if err != nil { + return ast.WalkStop, err + } + return ast.WalkContinue, nil +} diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index f1e44a8fbc..5230fca4dc 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -7,161 +7,83 @@ package markdown import ( "bytes" - "io" - "strings" + "sync" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/markup/common" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" - - "github.com/russross/blackfriday/v2" + giteautil "code.gitea.io/gitea/modules/util" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" ) -// Renderer is a extended version of underlying render object. -type Renderer struct { - blackfriday.Renderer - URLPrefix string - IsWiki bool -} +var converter goldmark.Markdown +var once = sync.Once{} -var byteMailto = []byte("mailto:") +var urlPrefixKey = parser.NewContextKey() +var isWikiKey = parser.NewContextKey() -var htmlEscaper = [256][]byte{ - '&': []byte("&"), - '<': []byte("<"), - '>': []byte(">"), - '"': []byte("""), +// NewGiteaParseContext creates a parser.Context with the gitea context set +func NewGiteaParseContext(urlPrefix string, isWiki bool) parser.Context { + pc := parser.NewContext(parser.WithIDs(newPrefixedIDs())) + pc.Set(urlPrefixKey, urlPrefix) + pc.Set(isWikiKey, isWiki) + return pc } -func escapeHTML(w io.Writer, s []byte) { - var start, end int - for end < len(s) { - escSeq := htmlEscaper[s[end]] - if escSeq != nil { - _, _ = w.Write(s[start:end]) - _, _ = w.Write(escSeq) - start = end + 1 - } - end++ - } - if start < len(s) && end <= len(s) { - _, _ = w.Write(s[start:end]) - } -} - -// RenderNode is a default renderer of a single node of a syntax tree. For -// block nodes it will be called twice: first time with entering=true, second -// time with entering=false, so that it could know when it's working on an open -// tag and when on close. It writes the result to w. -// -// The return value is a way to tell the calling walker to adjust its walk -// pattern: e.g. it can terminate the traversal by returning Terminate. Or it -// can ask the walker to skip a subtree of this node by returning SkipChildren. -// The typical behavior is to return GoToNext, which asks for the usual -// traversal to the next node. -func (r *Renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - switch node.Type { - case blackfriday.Image: - prefix := r.URLPrefix - if r.IsWiki { - prefix = util.URLJoin(prefix, "wiki", "raw") - } - prefix = strings.Replace(prefix, "/src/", "/media/", 1) - link := node.LinkData.Destination - if len(link) > 0 && !markup.IsLink(link) { - lnk := string(link) - lnk = util.URLJoin(prefix, lnk) - lnk = strings.Replace(lnk, " ", "+", -1) - link = []byte(lnk) - } - node.LinkData.Destination = link - // Render link around image only if parent is not link already - if node.Parent != nil && node.Parent.Type != blackfriday.Link { - if entering { - _, _ = w.Write([]byte(``)) - return r.Renderer.RenderNode(w, node, entering) - } - s := r.Renderer.RenderNode(w, node, entering) - _, _ = w.Write([]byte(``)) - return s - } - return r.Renderer.RenderNode(w, node, entering) - case blackfriday.Link: - // special case: this is not a link, a hash link or a mailto:, so it's a - // relative URL - link := node.LinkData.Destination - if len(link) > 0 && !markup.IsLink(link) && - link[0] != '#' && !bytes.HasPrefix(link, byteMailto) && - node.LinkData.Footnote == nil { - lnk := string(link) - if r.IsWiki { - lnk = util.URLJoin("wiki", lnk) - } - link = []byte(util.URLJoin(r.URLPrefix, lnk)) - } - node.LinkData.Destination = link - return r.Renderer.RenderNode(w, node, entering) - case blackfriday.Text: - isListItem := false - for n := node.Parent; n != nil; n = n.Parent { - if n.Type == blackfriday.Item { - isListItem = true - break - } - } - if isListItem { - text := node.Literal - switch { - case bytes.HasPrefix(text, []byte("[ ] ")): - _, _ = w.Write([]byte(``)) - text = text[3:] - case bytes.HasPrefix(text, []byte("[x] ")): - _, _ = w.Write([]byte(``)) - text = text[3:] - } - node.Literal = text - } - } - return r.Renderer.RenderNode(w, node, entering) -} - -const ( - blackfridayExtensions = 0 | - blackfriday.NoIntraEmphasis | - blackfriday.Tables | - blackfriday.FencedCode | - blackfriday.Strikethrough | - blackfriday.NoEmptyLineBeforeBlock | - blackfriday.DefinitionLists | - blackfriday.Footnotes | - blackfriday.HeadingIDs | - blackfriday.AutoHeadingIDs - blackfridayHTMLFlags = 0 | - blackfriday.Smartypants -) - // RenderRaw renders Markdown to HTML without handling special links. func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte { - renderer := &Renderer{ - Renderer: blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{ - Flags: blackfridayHTMLFlags, - FootnoteAnchorPrefix: "user-content-", - HeadingIDPrefix: "user-content-", - }), - URLPrefix: urlPrefix, - IsWiki: wikiMarkdown, - } + once.Do(func() { + converter = goldmark.New( + goldmark.WithExtensions(extension.Table, + extension.Strikethrough, + extension.TaskList, + extension.DefinitionList, + common.FootnoteExtension, + extension.NewTypographer( + extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ + extension.EnDash: nil, + extension.EmDash: nil, + }), + ), + ), + goldmark.WithParserOptions( + parser.WithAttribute(), + parser.WithAutoHeadingID(), + parser.WithASTTransformers( + util.Prioritized(&GiteaASTTransformer{}, 10000), + ), + ), + goldmark.WithRendererOptions( + html.WithUnsafe(), + ), + ) + + // Override the original Tasklist renderer! + converter.Renderer().AddOptions( + renderer.WithNodeRenderers( + util.Prioritized(NewTaskCheckBoxHTMLRenderer(), 1000), + ), + ) + + if setting.Markdown.EnableHardLineBreak { + converter.Renderer().AddOptions(html.WithHardWraps()) + } + }) - exts := blackfridayExtensions - if setting.Markdown.EnableHardLineBreak { - exts |= blackfriday.HardLineBreak + pc := NewGiteaParseContext(urlPrefix, wikiMarkdown) + var buf bytes.Buffer + if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil { + log.Error("Unable to render: %v", err) } - // Need to normalize EOL to UNIX LF to have consistent results in rendering - body = blackfriday.Run(util.NormalizeEOL(body), blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts)) - return markup.SanitizeBytes(body) + return markup.SanitizeReader(&buf).Bytes() } var ( @@ -174,8 +96,7 @@ func init() { } // Parser implements markup.Parser -type Parser struct { -} +type Parser struct{} // Name implements markup.Parser func (Parser) Name() string { diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go index e3156a657b..53772ee441 100644 --- a/modules/markup/markdown/markdown_test.go +++ b/modules/markup/markdown/markdown_test.go @@ -98,16 +98,12 @@ func TestRender_Images(t *testing.T) { func testAnswers(baseURLContent, baseURLImages string) []string { return []string{ `

Wiki! Enjoy :)

- -

See commit 65f1bf27bc

-

Ideas and codes

- `, `

What is Wine Staging?

-

Wine Staging on website wine-staging.com.

- -

Here are some links to the most important topics. You can find the full list of pages at the sidebar.

- @@ -131,7 +123,6 @@ func testAnswers(baseURLContent, baseURLImages string) []string { - @@ -141,20 +132,15 @@ func testAnswers(baseURLContent, baseURLImages string) []string {
Installation
images/icon-usage.png
`, `

Excelsior JET allows you to create native executables for Windows, Linux and Mac OS X.

-
  1. Package your libGDX application images/1.png
  2. Perform a test run by hitting the Run! button. images/2.png
-

More tests

-

(from https://www.markdownguide.org/extended-syntax/)

-

Definition list

-
First Term
This is the definition of the first term.
@@ -162,27 +148,21 @@ func testAnswers(baseURLContent, baseURLImages string) []string {
This is one definition of the second term.
This is another definition of the second term.
-

Footnotes

-

Here is a simple footnote,1 and here is a longer one.2

-
-
-
    -
  1. This is the first footnote.
  2. - -
  3. Here is one with multiple paragraphs and code.

    - +
  4. +

    This is the first footnote. ↩︎

    +
  5. +
  6. +

    Here is one with multiple paragraphs and code.

    Indent paragraphs to include them in the footnote.

    -

    { my code }

    - -

    Add as many paragraphs as you like.

  7. +

    Add as many paragraphs as you like. ↩︎

    +
-
`, } @@ -299,15 +279,15 @@ func TestRender_RenderParagraphs(t *testing.T) { test := func(t *testing.T, str string, cnt int) { unix := []byte(str) res := string(RenderRaw(unix, "", false)) - assert.Equal(t, strings.Count(res, "