diff options
Diffstat (limited to 'modules')
-rw-r--r-- | modules/markup/common/footnote.go | 507 | ||||
-rw-r--r-- | modules/markup/common/html.go | 19 | ||||
-rw-r--r-- | modules/markup/common/linkify.go | 156 | ||||
-rw-r--r-- | modules/markup/html.go | 15 | ||||
-rw-r--r-- | modules/markup/html_test.go | 4 | ||||
-rw-r--r-- | modules/markup/markdown/goldmark.go | 178 | ||||
-rw-r--r-- | modules/markup/markdown/markdown.go | 207 | ||||
-rw-r--r-- | modules/markup/markdown/markdown_test.go | 40 | ||||
-rw-r--r-- | modules/markup/mdstripper/mdstripper.go | 177 | ||||
-rw-r--r-- | modules/markup/mdstripper/mdstripper_test.go | 14 | ||||
-rw-r--r-- | modules/markup/sanitizer.go | 8 | ||||
-rw-r--r-- | modules/references/references_test.go | 4 |
12 files changed, 1064 insertions, 265 deletions
diff --git a/modules/markup/common/footnote.go b/modules/markup/common/footnote.go new file mode 100644 index 0000000000..ad4cd7f2e1 --- /dev/null +++ b/modules/markup/common/footnote.go @@ -0,0 +1,507 @@ +// Copyright 2019 Yusuke Inuzuka +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// Most of what follows is a subtly changed version of github.com/yuin/goldmark/extension/footnote.go + +package common + +import ( + "bytes" + "fmt" + "os" + "strconv" + "unicode" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// CleanValue will clean a value to make it safe to be an id +// This function is quite different from the original goldmark function +// and more closely matches the output from the shurcooL sanitizer +// In particular Unicode letters and numbers are a lot more than a-zA-Z0-9... +func CleanValue(value []byte) []byte { + value = bytes.TrimSpace(value) + rs := bytes.Runes(value) + result := make([]rune, 0, len(rs)) + needsDash := false + for _, r := range rs { + switch { + case unicode.IsLetter(r) || unicode.IsNumber(r): + if needsDash && len(result) > 0 { + result = append(result, '-') + } + needsDash = false + result = append(result, unicode.ToLower(r)) + default: + needsDash = true + } + } + return []byte(string(result)) +} + +// Most of what follows is a subtly changed version of github.com/yuin/goldmark/extension/footnote.go + +// A FootnoteLink struct represents a link to a footnote of Markdown +// (PHP Markdown Extra) text. +type FootnoteLink struct { + ast.BaseInline + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *FootnoteLink) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = fmt.Sprintf("%v", n.Index) + m["Name"] = fmt.Sprintf("%v", n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteLink is a NodeKind of the FootnoteLink node. +var KindFootnoteLink = ast.NewNodeKind("GiteaFootnoteLink") + +// Kind implements Node.Kind. +func (n *FootnoteLink) Kind() ast.NodeKind { + return KindFootnoteLink +} + +// NewFootnoteLink returns a new FootnoteLink node. +func NewFootnoteLink(index int, name []byte) *FootnoteLink { + return &FootnoteLink{ + Index: index, + Name: name, + } +} + +// A FootnoteBackLink struct represents a link to a footnote of Markdown +// (PHP Markdown Extra) text. +type FootnoteBackLink struct { + ast.BaseInline + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *FootnoteBackLink) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = fmt.Sprintf("%v", n.Index) + m["Name"] = fmt.Sprintf("%v", n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteBackLink is a NodeKind of the FootnoteBackLink node. +var KindFootnoteBackLink = ast.NewNodeKind("GiteaFootnoteBackLink") + +// Kind implements Node.Kind. +func (n *FootnoteBackLink) Kind() ast.NodeKind { + return KindFootnoteBackLink +} + +// NewFootnoteBackLink returns a new FootnoteBackLink node. +func NewFootnoteBackLink(index int, name []byte) *FootnoteBackLink { + return &FootnoteBackLink{ + Index: index, + Name: name, + } +} + +// A Footnote struct represents a footnote of Markdown +// (PHP Markdown Extra) text. +type Footnote struct { + ast.BaseBlock + Ref []byte + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *Footnote) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = fmt.Sprintf("%v", n.Index) + m["Ref"] = fmt.Sprintf("%s", n.Ref) + m["Name"] = fmt.Sprintf("%v", n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnote is a NodeKind of the Footnote node. +var KindFootnote = ast.NewNodeKind("GiteaFootnote") + +// Kind implements Node.Kind. +func (n *Footnote) Kind() ast.NodeKind { + return KindFootnote +} + +// NewFootnote returns a new Footnote node. +func NewFootnote(ref []byte) *Footnote { + return &Footnote{ + Ref: ref, + Index: -1, + Name: ref, + } +} + +// A FootnoteList struct represents footnotes of Markdown +// (PHP Markdown Extra) text. +type FootnoteList struct { + ast.BaseBlock + Count int +} + +// Dump implements Node.Dump. +func (n *FootnoteList) Dump(source []byte, level int) { + m := map[string]string{} + m["Count"] = fmt.Sprintf("%v", n.Count) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteList is a NodeKind of the FootnoteList node. +var KindFootnoteList = ast.NewNodeKind("GiteaFootnoteList") + +// Kind implements Node.Kind. +func (n *FootnoteList) Kind() ast.NodeKind { + return KindFootnoteList +} + +// NewFootnoteList returns a new FootnoteList node. +func NewFootnoteList() *FootnoteList { + return &FootnoteList{ + Count: 0, + } +} + +var footnoteListKey = parser.NewContextKey() + +type footnoteBlockParser struct { +} + +var defaultFootnoteBlockParser = &footnoteBlockParser{} + +// NewFootnoteBlockParser returns a new parser.BlockParser that can parse +// footnotes of the Markdown(PHP Markdown Extra) text. +func NewFootnoteBlockParser() parser.BlockParser { + return defaultFootnoteBlockParser +} + +func (b *footnoteBlockParser) Trigger() []byte { + return []byte{'['} +} + +func (b *footnoteBlockParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) { + line, segment := reader.PeekLine() + pos := pc.BlockOffset() + if pos < 0 || line[pos] != '[' { + return nil, parser.NoChildren + } + pos++ + if pos > len(line)-1 || line[pos] != '^' { + return nil, parser.NoChildren + } + open := pos + 1 + closes := 0 + closure := util.FindClosure(line[pos+1:], '[', ']', false, false) + closes = pos + 1 + closure + next := closes + 1 + if closure > -1 { + if next >= len(line) || line[next] != ':' { + return nil, parser.NoChildren + } + } else { + return nil, parser.NoChildren + } + padding := segment.Padding + label := reader.Value(text.NewSegment(segment.Start+open-padding, segment.Start+closes-padding)) + if util.IsBlank(label) { + return nil, parser.NoChildren + } + item := NewFootnote(label) + + pos = next + 1 - padding + if pos >= len(line) { + reader.Advance(pos) + return item, parser.NoChildren + } + reader.AdvanceAndSetPadding(pos, padding) + return item, parser.HasChildren +} + +func (b *footnoteBlockParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State { + line, _ := reader.PeekLine() + if util.IsBlank(line) { + return parser.Continue | parser.HasChildren + } + childpos, padding := util.IndentPosition(line, reader.LineOffset(), 4) + if childpos < 0 { + return parser.Close + } + reader.AdvanceAndSetPadding(childpos, padding) + return parser.Continue | parser.HasChildren +} + +func (b *footnoteBlockParser) Close(node ast.Node, reader text.Reader, pc parser.Context) { + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } else { + list = NewFootnoteList() + pc.Set(footnoteListKey, list) + node.Parent().InsertBefore(node.Parent(), node, list) + } + node.Parent().RemoveChild(node.Parent(), node) + list.AppendChild(list, node) +} + +func (b *footnoteBlockParser) CanInterruptParagraph() bool { + return true +} + +func (b *footnoteBlockParser) CanAcceptIndentedLine() bool { + return false +} + +type footnoteParser struct { +} + +var defaultFootnoteParser = &footnoteParser{} + +// NewFootnoteParser returns a new parser.InlineParser that can parse +// footnote links of the Markdown(PHP Markdown Extra) text. +func NewFootnoteParser() parser.InlineParser { + return defaultFootnoteParser +} + +func (s *footnoteParser) Trigger() []byte { + // footnote syntax probably conflict with the image syntax. + // So we need trigger this parser with '!'. + return []byte{'!', '['} +} + +func (s *footnoteParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + line, segment := block.PeekLine() + pos := 1 + if len(line) > 0 && line[0] == '!' { + pos++ + } + if pos >= len(line) || line[pos] != '^' { + return nil + } + pos++ + if pos >= len(line) { + return nil + } + open := pos + closure := util.FindClosure(line[pos:], '[', ']', false, false) + if closure < 0 { + return nil + } + closes := pos + closure + value := block.Value(text.NewSegment(segment.Start+open, segment.Start+closes)) + block.Advance(closes + 1) + + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } + if list == nil { + return nil + } + index := 0 + name := []byte{} + for def := list.FirstChild(); def != nil; def = def.NextSibling() { + d := def.(*Footnote) + if bytes.Equal(d.Ref, value) { + if d.Index < 0 { + list.Count++ + d.Index = list.Count + val := CleanValue(d.Name) + if len(val) == 0 { + val = []byte(strconv.Itoa(d.Index)) + } + d.Name = pc.IDs().Generate(val, KindFootnote) + } + index = d.Index + name = d.Name + break + } + } + if index == 0 { + return nil + } + + return NewFootnoteLink(index, name) +} + +type footnoteASTTransformer struct { +} + +var defaultFootnoteASTTransformer = &footnoteASTTransformer{} + +// NewFootnoteASTTransformer returns a new parser.ASTTransformer that +// insert a footnote list to the last of the document. +func NewFootnoteASTTransformer() parser.ASTTransformer { + return defaultFootnoteASTTransformer +} + +func (a *footnoteASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } else { + return + } + pc.Set(footnoteListKey, nil) + for footnote := list.FirstChild(); footnote != nil; { + var container ast.Node = footnote + next := footnote.NextSibling() + if fc := container.LastChild(); fc != nil && ast.IsParagraph(fc) { + container = fc + } + footnoteNode := footnote.(*Footnote) + index := footnoteNode.Index + name := footnoteNode.Name + if index < 0 { + list.RemoveChild(list, footnote) + } else { + container.AppendChild(container, NewFootnoteBackLink(index, name)) + } + footnote = next + } + list.SortChildren(func(n1, n2 ast.Node) int { + if n1.(*Footnote).Index < n2.(*Footnote).Index { + return -1 + } + return 1 + }) + if list.Count <= 0 { + list.Parent().RemoveChild(list.Parent(), list) + return + } + + node.AppendChild(node, list) +} + +// FootnoteHTMLRenderer is a renderer.NodeRenderer implementation that +// renders FootnoteLink nodes. +type FootnoteHTMLRenderer struct { + html.Config +} + +// NewFootnoteHTMLRenderer returns a new FootnoteHTMLRenderer. +func NewFootnoteHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &FootnoteHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *FootnoteHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(KindFootnoteLink, r.renderFootnoteLink) + reg.Register(KindFootnoteBackLink, r.renderFootnoteBackLink) + reg.Register(KindFootnote, r.renderFootnote) + reg.Register(KindFootnoteList, r.renderFootnoteList) +} + +func (r *FootnoteHTMLRenderer) renderFootnoteLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if entering { + n := node.(*FootnoteLink) + n.Dump(source, 0) + is := strconv.Itoa(n.Index) + _, _ = w.WriteString(`<sup id="fnref:`) + _, _ = w.Write(n.Name) + _, _ = w.WriteString(`"><a href="#fn:`) + _, _ = w.Write(n.Name) + _, _ = w.WriteString(`" class="footnote-ref" role="doc-noteref">`) + _, _ = w.WriteString(is) + _, _ = w.WriteString(`</a></sup>`) + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnoteBackLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if entering { + n := node.(*FootnoteBackLink) + fmt.Fprintf(os.Stdout, "source:\n%s\n", string(n.Text(source))) + _, _ = w.WriteString(` <a href="#fnref:`) + _, _ = w.Write(n.Name) + _, _ = w.WriteString(`" class="footnote-backref" role="doc-backlink">`) + _, _ = w.WriteString("↩︎") + _, _ = w.WriteString(`</a>`) + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnote(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*Footnote) + if entering { + fmt.Fprintf(os.Stdout, "source:\n%s\n", string(n.Text(source))) + _, _ = w.WriteString(`<li id="fn:`) + _, _ = w.Write(n.Name) + _, _ = w.WriteString(`" role="doc-endnote"`) + if node.Attributes() != nil { + html.RenderAttributes(w, node, html.ListItemAttributeFilter) + } + _, _ = w.WriteString(">\n") + } else { + _, _ = w.WriteString("</li>\n") + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnoteList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + tag := "div" + if entering { + _, _ = w.WriteString("<") + _, _ = w.WriteString(tag) + _, _ = w.WriteString(` class="footnotes" role="doc-endnotes"`) + if node.Attributes() != nil { + html.RenderAttributes(w, node, html.GlobalAttributeFilter) + } + _ = w.WriteByte('>') + if r.Config.XHTML { + _, _ = w.WriteString("\n<hr />\n") + } else { + _, _ = w.WriteString("\n<hr>\n") + } + _, _ = w.WriteString("<ol>\n") + } else { + _, _ = w.WriteString("</ol>\n") + _, _ = w.WriteString("</") + _, _ = w.WriteString(tag) + _, _ = w.WriteString(">\n") + } + return ast.WalkContinue, nil +} + +type footnoteExtension struct{} + +// FootnoteExtension represents the Gitea Footnote +var FootnoteExtension = &footnoteExtension{} + +// Extend extends the markdown converter with the Gitea Footnote parser +func (e *footnoteExtension) Extend(m goldmark.Markdown) { + m.Parser().AddOptions( + parser.WithBlockParsers( + util.Prioritized(NewFootnoteBlockParser(), 999), + ), + parser.WithInlineParsers( + util.Prioritized(NewFootnoteParser(), 101), + ), + parser.WithASTTransformers( + util.Prioritized(NewFootnoteASTTransformer(), 999), + ), + ) + m.Renderer().AddOptions(renderer.WithNodeRenderers( + util.Prioritized(NewFootnoteHTMLRenderer(), 500), + )) +} diff --git a/modules/markup/common/html.go b/modules/markup/common/html.go new file mode 100644 index 0000000000..3a47686f1e --- /dev/null +++ b/modules/markup/common/html.go @@ -0,0 +1,19 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package common + +import ( + "mvdan.cc/xurls/v2" +) + +var ( + // NOTE: All below regex matching do not perform any extra validation. + // Thus a link is produced even if the linked entity does not exist. + // While fast, this is also incorrect and lead to false positives. + // TODO: fix invalid linking issue + + // LinkRegex is a regexp matching a valid link + LinkRegex, _ = xurls.StrictMatchingScheme("https?://") +) diff --git a/modules/markup/common/linkify.go b/modules/markup/common/linkify.go new file mode 100644 index 0000000000..6ae70fba34 --- /dev/null +++ b/modules/markup/common/linkify.go @@ -0,0 +1,156 @@ +// Copyright 2019 Yusuke Inuzuka +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// Most of this file is a subtly changed version of github.com/yuin/goldmark/extension/linkify.go + +package common + +import ( + "bytes" + "regexp" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) + +type linkifyParser struct { +} + +var defaultLinkifyParser = &linkifyParser{} + +// NewLinkifyParser return a new InlineParser can parse +// text that seems like a URL. +func NewLinkifyParser() parser.InlineParser { + return defaultLinkifyParser +} + +func (s *linkifyParser) Trigger() []byte { + // ' ' indicates any white spaces and a line head + return []byte{' ', '*', '_', '~', '('} +} + +var protoHTTP = []byte("http:") +var protoHTTPS = []byte("https:") +var protoFTP = []byte("ftp:") +var domainWWW = []byte("www.") + +func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + if pc.IsInLinkLabel() { + return nil + } + line, segment := block.PeekLine() + consumes := 0 + start := segment.Start + c := line[0] + // advance if current position is not a line head. + if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' { + consumes++ + start++ + line = line[1:] + } + + var m []int + var protocol []byte + var typ ast.AutoLinkType = ast.AutoLinkURL + if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { + m = LinkRegex.FindSubmatchIndex(line) + } + if m == nil && bytes.HasPrefix(line, domainWWW) { + m = wwwURLRegxp.FindSubmatchIndex(line) + protocol = []byte("http") + } + if m != nil { + lastChar := line[m[1]-1] + if lastChar == '.' { + m[1]-- + } else if lastChar == ')' { + closing := 0 + for i := m[1] - 1; i >= m[0]; i-- { + if line[i] == ')' { + closing++ + } else if line[i] == '(' { + closing-- + } + } + if closing > 0 { + m[1] -= closing + } + } else if lastChar == ';' { + i := m[1] - 2 + for ; i >= m[0]; i-- { + if util.IsAlphaNumeric(line[i]) { + continue + } + break + } + if i != m[1]-2 { + if line[i] == '&' { + m[1] -= m[1] - i + } + } + } + } + if m == nil { + if len(line) > 0 && util.IsPunct(line[0]) { + return nil + } + typ = ast.AutoLinkEmail + stop := util.FindEmailIndex(line) + if stop < 0 { + return nil + } + at := bytes.IndexByte(line, '@') + m = []int{0, stop, at, stop - 1} + if m == nil || bytes.IndexByte(line[m[2]:m[3]], '.') < 0 { + return nil + } + lastChar := line[m[1]-1] + if lastChar == '.' { + m[1]-- + } + if m[1] < len(line) { + nextChar := line[m[1]] + if nextChar == '-' || nextChar == '_' { + return nil + } + } + } + if m == nil { + return nil + } + if consumes != 0 { + s := segment.WithStop(segment.Start + 1) + ast.MergeOrAppendTextSegment(parent, s) + } + consumes += m[1] + block.Advance(consumes) + n := ast.NewTextSegment(text.NewSegment(start, start+m[1])) + link := ast.NewAutoLink(typ, n) + link.Protocol = protocol + return link +} + +func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) { + // nothing to do +} + +type linkify struct { +} + +// Linkify is an extension that allow you to parse text that seems like a URL. +var Linkify = &linkify{} + +func (e *linkify) Extend(m goldmark.Markdown) { + m.Parser().AddOptions( + parser.WithInlineParsers( + util.Prioritized(NewLinkifyParser(), 999), + ), + ) +} diff --git a/modules/markup/html.go b/modules/markup/html.go index b10da40fc1..2c6773bce4 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -15,6 +15,7 @@ import ( "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/markup/common" "code.gitea.io/gitea/modules/references" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" @@ -57,8 +58,6 @@ var ( // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|\\.(\\s|$))") - linkRegex, _ = xurls.StrictMatchingScheme("https?://") - // blackfriday extensions create IDs like fn:user-content-footnote blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) ) @@ -118,7 +117,7 @@ func CustomLinkURLSchemes(schemes []string) { } withAuth = append(withAuth, s) } - linkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) + common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) } // IsSameDomain checks if given url string has the same hostname as current Gitea instance @@ -509,6 +508,12 @@ func shortLinkProcessorFull(ctx *postProcessCtx, node *html.Node, noLink bool) { (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { const lenQuote = len("‘") val = val[lenQuote : len(val)-lenQuote] + } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || + (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { + val = val[1 : len(val)-1] + } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { + const lenQuote = len("‘") + val = val[1 : len(val)-lenQuote] } props[key] = val } @@ -803,7 +808,7 @@ func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) { // linkProcessor creates links for any HTTP or HTTPS URL not captured by // markdown. func linkProcessor(ctx *postProcessCtx, node *html.Node) { - m := linkRegex.FindStringIndex(node.Data) + m := common.LinkRegex.FindStringIndex(node.Data) if m == nil { return } @@ -832,7 +837,7 @@ func genDefaultLinkProcessor(defaultLink string) processor { // descriptionLinkProcessor creates links for DescriptionHTML func descriptionLinkProcessor(ctx *postProcessCtx, node *html.Node) { - m := linkRegex.FindStringIndex(node.Data) + m := common.LinkRegex.FindStringIndex(node.Data) if m == nil { return } diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 07747e97e1..91ef320b40 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -323,6 +323,6 @@ func TestRender_ShortLinks(t *testing.T) { `<p><a href="`+notencodedImgurlWiki+`" rel="nofollow"><img src="`+notencodedImgurlWiki+`"/></a></p>`) test( "<p><a href=\"https://example.org\">[[foobar]]</a></p>", - `<p></p><p><a href="https://example.org" rel="nofollow">[[foobar]]</a></p><p></p>`, - `<p></p><p><a href="https://example.org" rel="nofollow">[[foobar]]</a></p><p></p>`) + `<p><a href="https://example.org" rel="nofollow">[[foobar]]</a></p>`, + `<p><a href="https://example.org" rel="nofollow">[[foobar]]</a></p>`) } diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go new file mode 100644 index 0000000000..2a2a9dce6a --- /dev/null +++ b/modules/markup/markdown/goldmark.go @@ -0,0 +1,178 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markdown + +import ( + "bytes" + "fmt" + "strings" + + "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/markup/common" + giteautil "code.gitea.io/gitea/modules/util" + + "github.com/yuin/goldmark/ast" + east "github.com/yuin/goldmark/extension/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var byteMailto = []byte("mailto:") + +// GiteaASTTransformer is a default transformer of the goldmark tree. +type GiteaASTTransformer struct{} + +// Transform transforms the given AST tree. +func (g *GiteaASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + switch v := n.(type) { + case *ast.Image: + // Images need two things: + // + // 1. Their src needs to munged to be a real value + // 2. If they're not wrapped with a link they need a link wrapper + + // Check if the destination is a real link + link := v.Destination + if len(link) > 0 && !markup.IsLink(link) { + prefix := pc.Get(urlPrefixKey).(string) + if pc.Get(isWikiKey).(bool) { + prefix = giteautil.URLJoin(prefix, "wiki", "raw") + } + prefix = strings.Replace(prefix, "/src/", "/media/", 1) + + lnk := string(link) + lnk = giteautil.URLJoin(prefix, lnk) + lnk = strings.Replace(lnk, " ", "+", -1) + link = []byte(lnk) + } + v.Destination = link + + parent := n.Parent() + // Create a link around image only if parent is not already a link + if _, ok := parent.(*ast.Link); !ok && parent != nil { + wrap := ast.NewLink() + wrap.Destination = link + wrap.Title = v.Title + parent.ReplaceChild(parent, n, wrap) + wrap.AppendChild(wrap, n) + } + case *ast.Link: + // Links need their href to munged to be a real value + link := v.Destination + if len(link) > 0 && !markup.IsLink(link) && + link[0] != '#' && !bytes.HasPrefix(link, byteMailto) { + // special case: this is not a link, a hash link or a mailto:, so it's a + // relative URL + lnk := string(link) + if pc.Get(isWikiKey).(bool) { + lnk = giteautil.URLJoin("wiki", lnk) + } + link = []byte(giteautil.URLJoin(pc.Get(urlPrefixKey).(string), lnk)) + } + v.Destination = link + } + return ast.WalkContinue, nil + }) +} + +type prefixedIDs struct { + values map[string]bool +} + +// Generate generates a new element id. +func (p *prefixedIDs) Generate(value []byte, kind ast.NodeKind) []byte { + dft := []byte("id") + if kind == ast.KindHeading { + dft = []byte("heading") + } + return p.GenerateWithDefault(value, dft) +} + +// Generate generates a new element id. +func (p *prefixedIDs) GenerateWithDefault(value []byte, dft []byte) []byte { + result := common.CleanValue(value) + if len(result) == 0 { + result = dft + } + if !bytes.HasPrefix(result, []byte("user-content-")) { + result = append([]byte("user-content-"), result...) + } + if _, ok := p.values[util.BytesToReadOnlyString(result)]; !ok { + p.values[util.BytesToReadOnlyString(result)] = true + return result + } + for i := 1; ; i++ { + newResult := fmt.Sprintf("%s-%d", result, i) + if _, ok := p.values[newResult]; !ok { + p.values[newResult] = true + return []byte(newResult) + } + } +} + +// Put puts a given element id to the used ids table. +func (p *prefixedIDs) Put(value []byte) { + p.values[util.BytesToReadOnlyString(value)] = true +} + +func newPrefixedIDs() *prefixedIDs { + return &prefixedIDs{ + values: map[string]bool{}, + } +} + +// NewTaskCheckBoxHTMLRenderer creates a TaskCheckBoxHTMLRenderer to render tasklists +// in the gitea form. +func NewTaskCheckBoxHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &TaskCheckBoxHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// TaskCheckBoxHTMLRenderer is a renderer.NodeRenderer implementation that +// renders checkboxes in list items. +// Overrides the default goldmark one to present the gitea format +type TaskCheckBoxHTMLRenderer struct { + html.Config +} + +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *TaskCheckBoxHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(east.KindTaskCheckBox, r.renderTaskCheckBox) +} + +func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + n := node.(*east.TaskCheckBox) + + end := ">" + if r.XHTML { + end = " />" + } + var err error + if n.IsChecked { + _, err = w.WriteString(`<span class="ui fitted disabled checkbox"><input type="checkbox" disabled="disabled"` + end + `<label` + end + `</span>`) + } else { + _, err = w.WriteString(`<span class="ui checked fitted disabled checkbox"><input type="checkbox" checked="" disabled="disabled"` + end + `<label` + end + `</span>`) + } + if err != nil { + return ast.WalkStop, err + } + return ast.WalkContinue, nil +} diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index f1e44a8fbc..5230fca4dc 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -7,161 +7,83 @@ package markdown import ( "bytes" - "io" - "strings" + "sync" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/markup/common" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" - - "github.com/russross/blackfriday/v2" + giteautil "code.gitea.io/gitea/modules/util" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" ) -// Renderer is a extended version of underlying render object. -type Renderer struct { - blackfriday.Renderer - URLPrefix string - IsWiki bool -} +var converter goldmark.Markdown +var once = sync.Once{} -var byteMailto = []byte("mailto:") +var urlPrefixKey = parser.NewContextKey() +var isWikiKey = parser.NewContextKey() -var htmlEscaper = [256][]byte{ - '&': []byte("&"), - '<': []byte("<"), - '>': []byte(">"), - '"': []byte("""), +// NewGiteaParseContext creates a parser.Context with the gitea context set +func NewGiteaParseContext(urlPrefix string, isWiki bool) parser.Context { + pc := parser.NewContext(parser.WithIDs(newPrefixedIDs())) + pc.Set(urlPrefixKey, urlPrefix) + pc.Set(isWikiKey, isWiki) + return pc } -func escapeHTML(w io.Writer, s []byte) { - var start, end int - for end < len(s) { - escSeq := htmlEscaper[s[end]] - if escSeq != nil { - _, _ = w.Write(s[start:end]) - _, _ = w.Write(escSeq) - start = end + 1 - } - end++ - } - if start < len(s) && end <= len(s) { - _, _ = w.Write(s[start:end]) - } -} - -// RenderNode is a default renderer of a single node of a syntax tree. For -// block nodes it will be called twice: first time with entering=true, second -// time with entering=false, so that it could know when it's working on an open -// tag and when on close. It writes the result to w. -// -// The return value is a way to tell the calling walker to adjust its walk -// pattern: e.g. it can terminate the traversal by returning Terminate. Or it -// can ask the walker to skip a subtree of this node by returning SkipChildren. -// The typical behavior is to return GoToNext, which asks for the usual -// traversal to the next node. -func (r *Renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - switch node.Type { - case blackfriday.Image: - prefix := r.URLPrefix - if r.IsWiki { - prefix = util.URLJoin(prefix, "wiki", "raw") - } - prefix = strings.Replace(prefix, "/src/", "/media/", 1) - link := node.LinkData.Destination - if len(link) > 0 && !markup.IsLink(link) { - lnk := string(link) - lnk = util.URLJoin(prefix, lnk) - lnk = strings.Replace(lnk, " ", "+", -1) - link = []byte(lnk) - } - node.LinkData.Destination = link - // Render link around image only if parent is not link already - if node.Parent != nil && node.Parent.Type != blackfriday.Link { - if entering { - _, _ = w.Write([]byte(`<a href="`)) - escapeHTML(w, link) - _, _ = w.Write([]byte(`">`)) - return r.Renderer.RenderNode(w, node, entering) - } - s := r.Renderer.RenderNode(w, node, entering) - _, _ = w.Write([]byte(`</a>`)) - return s - } - return r.Renderer.RenderNode(w, node, entering) - case blackfriday.Link: - // special case: this is not a link, a hash link or a mailto:, so it's a - // relative URL - link := node.LinkData.Destination - if len(link) > 0 && !markup.IsLink(link) && - link[0] != '#' && !bytes.HasPrefix(link, byteMailto) && - node.LinkData.Footnote == nil { - lnk := string(link) - if r.IsWiki { - lnk = util.URLJoin("wiki", lnk) - } - link = []byte(util.URLJoin(r.URLPrefix, lnk)) - } - node.LinkData.Destination = link - return r.Renderer.RenderNode(w, node, entering) - case blackfriday.Text: - isListItem := false - for n := node.Parent; n != nil; n = n.Parent { - if n.Type == blackfriday.Item { - isListItem = true - break - } - } - if isListItem { - text := node.Literal - switch { - case bytes.HasPrefix(text, []byte("[ ] ")): - _, _ = w.Write([]byte(`<span class="ui fitted disabled checkbox"><input type="checkbox" disabled="disabled" /><label /></span>`)) - text = text[3:] - case bytes.HasPrefix(text, []byte("[x] ")): - _, _ = w.Write([]byte(`<span class="ui checked fitted disabled checkbox"><input type="checkbox" checked="" disabled="disabled" /><label /></span>`)) - text = text[3:] - } - node.Literal = text - } - } - return r.Renderer.RenderNode(w, node, entering) -} - -const ( - blackfridayExtensions = 0 | - blackfriday.NoIntraEmphasis | - blackfriday.Tables | - blackfriday.FencedCode | - blackfriday.Strikethrough | - blackfriday.NoEmptyLineBeforeBlock | - blackfriday.DefinitionLists | - blackfriday.Footnotes | - blackfriday.HeadingIDs | - blackfriday.AutoHeadingIDs - blackfridayHTMLFlags = 0 | - blackfriday.Smartypants -) - // RenderRaw renders Markdown to HTML without handling special links. func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte { - renderer := &Renderer{ - Renderer: blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{ - Flags: blackfridayHTMLFlags, - FootnoteAnchorPrefix: "user-content-", - HeadingIDPrefix: "user-content-", - }), - URLPrefix: urlPrefix, - IsWiki: wikiMarkdown, - } + once.Do(func() { + converter = goldmark.New( + goldmark.WithExtensions(extension.Table, + extension.Strikethrough, + extension.TaskList, + extension.DefinitionList, + common.FootnoteExtension, + extension.NewTypographer( + extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ + extension.EnDash: nil, + extension.EmDash: nil, + }), + ), + ), + goldmark.WithParserOptions( + parser.WithAttribute(), + parser.WithAutoHeadingID(), + parser.WithASTTransformers( + util.Prioritized(&GiteaASTTransformer{}, 10000), + ), + ), + goldmark.WithRendererOptions( + html.WithUnsafe(), + ), + ) + + // Override the original Tasklist renderer! + converter.Renderer().AddOptions( + renderer.WithNodeRenderers( + util.Prioritized(NewTaskCheckBoxHTMLRenderer(), 1000), + ), + ) + + if setting.Markdown.EnableHardLineBreak { + converter.Renderer().AddOptions(html.WithHardWraps()) + } + }) - exts := blackfridayExtensions - if setting.Markdown.EnableHardLineBreak { - exts |= blackfriday.HardLineBreak + pc := NewGiteaParseContext(urlPrefix, wikiMarkdown) + var buf bytes.Buffer + if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil { + log.Error("Unable to render: %v", err) } - // Need to normalize EOL to UNIX LF to have consistent results in rendering - body = blackfriday.Run(util.NormalizeEOL(body), blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts)) - return markup.SanitizeBytes(body) + return markup.SanitizeReader(&buf).Bytes() } var ( @@ -174,8 +96,7 @@ func init() { } // Parser implements markup.Parser -type Parser struct { -} +type Parser struct{} // Name implements markup.Parser func (Parser) Name() string { diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go index e3156a657b..53772ee441 100644 --- a/modules/markup/markdown/markdown_test.go +++ b/modules/markup/markdown/markdown_test.go @@ -98,16 +98,12 @@ func TestRender_Images(t *testing.T) { func testAnswers(baseURLContent, baseURLImages string) []string { return []string{ `<p>Wiki! Enjoy :)</p> - <ul> <li><a href="` + baseURLContent + `/Links" rel="nofollow">Links, Language bindings, Engine bindings</a></li> <li><a href="` + baseURLContent + `/Tips" rel="nofollow">Tips</a></li> </ul> - <p>See commit <a href="http://localhost:3000/gogits/gogs/commit/65f1bf27bc" rel="nofollow"><code>65f1bf27bc</code></a></p> - <p>Ideas and codes</p> - <ul> <li>Bezier widget (by <a href="` + AppURL + `r-lyeh" rel="nofollow">@r-lyeh</a>) <a href="http://localhost:3000/ocornut/imgui/issues/786" rel="nofollow">ocornut/imgui#786</a></li> <li>Bezier widget (by <a href="` + AppURL + `r-lyeh" rel="nofollow">@r-lyeh</a>) <a href="http://localhost:3000/gogits/gogs/issues/786" rel="nofollow">#786</a></li> @@ -117,13 +113,9 @@ func testAnswers(baseURLContent, baseURLImages string) []string { </ul> `, `<h2 id="user-content-what-is-wine-staging">What is Wine Staging?</h2> - <p><strong>Wine Staging</strong> on website <a href="http://wine-staging.com" rel="nofollow">wine-staging.com</a>.</p> - <h2 id="user-content-quick-links">Quick Links</h2> - <p>Here are some links to the most important topics. You can find the full list of pages at the sidebar.</p> - <table> <thead> <tr> @@ -131,7 +123,6 @@ func testAnswers(baseURLContent, baseURLImages string) []string { <th><a href="` + baseURLContent + `/Installation" rel="nofollow">Installation</a></th> </tr> </thead> - <tbody> <tr> <td><a href="` + baseURLImages + `/images/icon-usage.png" rel="nofollow"><img src="` + baseURLImages + `/images/icon-usage.png" title="icon-usage.png" alt="images/icon-usage.png"/></a></td> @@ -141,20 +132,15 @@ func testAnswers(baseURLContent, baseURLImages string) []string { </table> `, `<p><a href="http://www.excelsiorjet.com/" rel="nofollow">Excelsior JET</a> allows you to create native executables for Windows, Linux and Mac OS X.</p> - <ol> <li><a href="https://github.com/libgdx/libgdx/wiki/Gradle-on-the-Commandline#packaging-for-the-desktop" rel="nofollow">Package your libGDX application</a> <a href="` + baseURLImages + `/images/1.png" rel="nofollow"><img src="` + baseURLImages + `/images/1.png" title="1.png" alt="images/1.png"/></a></li> <li>Perform a test run by hitting the Run! button. <a href="` + baseURLImages + `/images/2.png" rel="nofollow"><img src="` + baseURLImages + `/images/2.png" title="2.png" alt="images/2.png"/></a></li> </ol> - <h2 id="user-content-custom-id">More tests</h2> - <p>(from <a href="https://www.markdownguide.org/extended-syntax/" rel="nofollow">https://www.markdownguide.org/extended-syntax/</a>)</p> - <h3 id="user-content-definition-list">Definition list</h3> - <dl> <dt>First Term</dt> <dd>This is the definition of the first term.</dd> @@ -162,27 +148,21 @@ func testAnswers(baseURLContent, baseURLImages string) []string { <dd>This is one definition of the second term.</dd> <dd>This is another definition of the second term.</dd> </dl> - <h3 id="user-content-footnotes">Footnotes</h3> - <p>Here is a simple footnote,<sup id="fnref:user-content-1"><a href="#fn:user-content-1" rel="nofollow">1</a></sup> and here is a longer one.<sup id="fnref:user-content-bignote"><a href="#fn:user-content-bignote" rel="nofollow">2</a></sup></p> - <div> - <hr/> - <ol> -<li id="fn:user-content-1">This is the first footnote.</li> - -<li id="fn:user-content-bignote"><p>Here is one with multiple paragraphs and code.</p> - +<li id="fn:user-content-1"> +<p>This is the first footnote. <a href="#fnref:user-content-1" rel="nofollow">↩︎</a></p> +</li> +<li id="fn:user-content-bignote"> +<p>Here is one with multiple paragraphs and code.</p> <p>Indent paragraphs to include them in the footnote.</p> - <p><code>{ my code }</code></p> - -<p>Add as many paragraphs as you like.</p></li> +<p>Add as many paragraphs as you like. <a href="#fnref:user-content-bignote" rel="nofollow">↩︎</a></p> +</li> </ol> - </div> `, } @@ -299,15 +279,15 @@ func TestRender_RenderParagraphs(t *testing.T) { test := func(t *testing.T, str string, cnt int) { unix := []byte(str) res := string(RenderRaw(unix, "", false)) - assert.Equal(t, strings.Count(res, "<p"), cnt) + assert.Equal(t, strings.Count(res, "<p"), cnt, "Rendered result for unix should have %d paragraph(s) but has %d:\n%s\n", cnt, strings.Count(res, "<p"), res) mac := []byte(strings.ReplaceAll(str, "\n", "\r")) res = string(RenderRaw(mac, "", false)) - assert.Equal(t, strings.Count(res, "<p"), cnt) + assert.Equal(t, strings.Count(res, "<p"), cnt, "Rendered result for mac should have %d paragraph(s) but has %d:\n%s\n", cnt, strings.Count(res, "<p"), res) dos := []byte(strings.ReplaceAll(str, "\n", "\r\n")) res = string(RenderRaw(dos, "", false)) - assert.Equal(t, strings.Count(res, "<p"), cnt) + assert.Equal(t, strings.Count(res, "<p"), cnt, "Rendered result for windows should have %d paragraph(s) but has %d:\n%s\n", cnt, strings.Count(res, "<p"), res) } test(t, "\nOne\nTwo\nThree", 1) diff --git a/modules/markup/mdstripper/mdstripper.go b/modules/markup/mdstripper/mdstripper.go index d248944b68..9d05ee3969 100644 --- a/modules/markup/mdstripper/mdstripper.go +++ b/modules/markup/mdstripper/mdstripper.go @@ -6,113 +6,128 @@ package mdstripper import ( "bytes" - "io" + "sync" - "github.com/russross/blackfriday/v2" -) + "io" -// MarkdownStripper extends blackfriday.Renderer -type MarkdownStripper struct { - links []string - coallesce bool - empty bool -} + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/markup/common" -const ( - blackfridayExtensions = 0 | - blackfriday.NoIntraEmphasis | - blackfriday.Tables | - blackfriday.FencedCode | - blackfriday.Strikethrough | - blackfriday.NoEmptyLineBeforeBlock | - blackfriday.DefinitionLists | - blackfriday.Footnotes | - blackfriday.HeadingIDs | - blackfriday.AutoHeadingIDs | - // Not included in modules/markup/markdown/markdown.go; - // required here to process inline links - blackfriday.Autolink + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" ) -// StripMarkdown parses markdown content by removing all markup and code blocks -// in order to extract links and other references -func StripMarkdown(rawBytes []byte) (string, []string) { - buf, links := StripMarkdownBytes(rawBytes) - return string(buf), links +type stripRenderer struct { + links []string + empty bool } -// StripMarkdownBytes parses markdown content by removing all markup and code blocks -// in order to extract links and other references -func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { - stripper := &MarkdownStripper{ - links: make([]string, 0, 10), - empty: true, - } - - parser := blackfriday.New(blackfriday.WithRenderer(stripper), blackfriday.WithExtensions(blackfridayExtensions)) - ast := parser.Parse(rawBytes) - var buf bytes.Buffer - stripper.RenderHeader(&buf, ast) - ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - return stripper.RenderNode(&buf, node, entering) +func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error { + return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + switch v := n.(type) { + case *ast.Text: + if !v.IsRaw() { + _, prevSibIsText := n.PreviousSibling().(*ast.Text) + coalesce := prevSibIsText + r.processString( + w, + v.Text(source), + coalesce) + if v.SoftLineBreak() { + r.doubleSpace(w) + } + } + return ast.WalkContinue, nil + case *ast.Link: + r.processLink(w, v.Destination) + return ast.WalkSkipChildren, nil + case *ast.AutoLink: + r.processLink(w, v.URL(source)) + return ast.WalkSkipChildren, nil + } + return ast.WalkContinue, nil }) - stripper.RenderFooter(&buf, ast) - return buf.Bytes(), stripper.GetLinks() -} - -// RenderNode is the main rendering method. It will be called once for -// every leaf node and twice for every non-leaf node (first with -// entering=true, then with entering=false). The method should write its -// rendition of the node to the supplied writer w. -func (r *MarkdownStripper) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - if !entering { - return blackfriday.GoToNext - } - switch node.Type { - case blackfriday.Text: - r.processString(w, node.Literal, node.Parent == nil) - return blackfriday.GoToNext - case blackfriday.Link: - r.processLink(w, node.LinkData.Destination) - r.coallesce = false - return blackfriday.SkipChildren - } - r.coallesce = false - return blackfriday.GoToNext -} - -// RenderHeader is a method that allows the renderer to produce some -// content preceding the main body of the output document. -func (r *MarkdownStripper) RenderHeader(w io.Writer, ast *blackfriday.Node) { } -// RenderFooter is a symmetric counterpart of RenderHeader. -func (r *MarkdownStripper) RenderFooter(w io.Writer, ast *blackfriday.Node) { -} - -func (r *MarkdownStripper) doubleSpace(w io.Writer) { +func (r *stripRenderer) doubleSpace(w io.Writer) { if !r.empty { _, _ = w.Write([]byte{'\n'}) } } -func (r *MarkdownStripper) processString(w io.Writer, text []byte, coallesce bool) { +func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) { // Always break-up words - if !coallesce || !r.coallesce { + if !coalesce { r.doubleSpace(w) } _, _ = w.Write(text) - r.coallesce = coallesce r.empty = false } -func (r *MarkdownStripper) processLink(w io.Writer, link []byte) { +func (r *stripRenderer) processLink(w io.Writer, link []byte) { // Links are processed out of band r.links = append(r.links, string(link)) - r.coallesce = false } // GetLinks returns the list of link data collected while parsing -func (r *MarkdownStripper) GetLinks() []string { +func (r *stripRenderer) GetLinks() []string { return r.links } + +// AddOptions adds given option to this renderer. +func (r *stripRenderer) AddOptions(...renderer.Option) { + // no-op +} + +// StripMarkdown parses markdown content by removing all markup and code blocks +// in order to extract links and other references +func StripMarkdown(rawBytes []byte) (string, []string) { + buf, links := StripMarkdownBytes(rawBytes) + return string(buf), links +} + +var stripParser parser.Parser +var once = sync.Once{} + +// StripMarkdownBytes parses markdown content by removing all markup and code blocks +// in order to extract links and other references +func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { + once.Do(func() { + gdMarkdown := goldmark.New( + goldmark.WithExtensions(extension.Table, + extension.Strikethrough, + extension.TaskList, + extension.DefinitionList, + common.FootnoteExtension, + common.Linkify, + ), + goldmark.WithParserOptions( + parser.WithAttribute(), + parser.WithAutoHeadingID(), + ), + goldmark.WithRendererOptions( + html.WithUnsafe(), + ), + ) + stripParser = gdMarkdown.Parser() + }) + stripper := &stripRenderer{ + links: make([]string, 0, 10), + empty: true, + } + reader := text.NewReader(rawBytes) + doc := stripParser.Parse(reader) + var buf bytes.Buffer + if err := stripper.Render(&buf, rawBytes, doc); err != nil { + log.Error("Unable to strip: %v", err) + } + return buf.Bytes(), stripper.GetLinks() +} diff --git a/modules/markup/mdstripper/mdstripper_test.go b/modules/markup/mdstripper/mdstripper_test.go index 157fe1975b..9efcc35949 100644 --- a/modules/markup/mdstripper/mdstripper_test.go +++ b/modules/markup/mdstripper/mdstripper_test.go @@ -53,6 +53,20 @@ A HIDDEN ` + "`" + `GHOST` + "`" + ` IN THIS LINE. []string{ "link", }}, + { + "Simply closes: #29 yes", + []string{ + "Simply closes: #29 yes", + }, + []string{}, + }, + { + "Simply closes: !29 yes", + []string{ + "Simply closes: !29 yes", + }, + []string{}, + }, } for _, test := range list { diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index f7789a9e56..d135d41966 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -6,6 +6,8 @@ package markup import ( + "bytes" + "io" "regexp" "sync" @@ -67,6 +69,12 @@ func Sanitize(s string) string { return sanitizer.policy.Sanitize(s) } +// SanitizeReader sanitizes a Reader +func SanitizeReader(r io.Reader) *bytes.Buffer { + NewSanitizer() + return sanitizer.policy.SanitizeReader(r) +} + // SanitizeBytes takes a []byte slice that contains a HTML fragment or document and applies policy whitelist. func SanitizeBytes(b []byte) []byte { if len(b) == 0 { diff --git a/modules/references/references_test.go b/modules/references/references_test.go index b38ee102ba..bcb2c4384f 100644 --- a/modules/references/references_test.go +++ b/modules/references/references_test.go @@ -44,10 +44,6 @@ func TestFindAllIssueReferences(t *testing.T) { }, }, { - "#123 no, this is a title.", - []testResult{}, - }, - { " #124 yes, this is a reference.", []testResult{ {124, "", "", "124", false, XRefActionNone, &RefSpan{Start: 0, End: 4}, nil}, |