summaryrefslogtreecommitdiffstats
path: root/modules/markup/mdstripper
diff options
context:
space:
mode:
authorguillep2k <18600385+guillep2k@users.noreply.github.com>2019-10-13 19:29:10 -0300
committerzeripath <art27@cantab.net>2019-10-13 23:29:10 +0100
commit15809d81f7d36759f289b941352a9754611c5dba (patch)
treef9362e535fb67aa59859b535ec6c58ccf5a139bf /modules/markup/mdstripper
parent6e3f51098b29cd5c61d62732a42a7554cbc8cc2f (diff)
downloadgitea-15809d81f7d36759f289b941352a9754611c5dba.tar.gz
gitea-15809d81f7d36759f289b941352a9754611c5dba.zip
Rewrite reference processing code in preparation for opening/closing from comment references (#8261)
* Add a markdown stripper for mentions and xrefs * Improve comments * Small code simplification * Move reference code to modules/references * Fix typo * Make MarkdownStripper return [][]byte * Implement preliminary keywords parsing * Add FIXME comment * Fix comment * make fmt * Fix permissions check * Fix text assumptions * Fix imports * Fix lint, fmt * Fix unused import * Add missing export comment * Bypass revive on implemented interface * Move mdstripper into its own package * Support alphanumeric patterns * Refactor FindAllMentions * Move mentions test to references * Parse mentions from reference package * Refactor code to implement renderizable references * Fix typo * Move patterns and tests to the references package * Fix nil reference * Preliminary rendering attempt of closing keywords * Normalize names, comments, general tidy-up * Add CSS style for action keywords * Fix permission for admin and owner * Fix golangci-lint * Fix golangci-lint
Diffstat (limited to 'modules/markup/mdstripper')
-rw-r--r--modules/markup/mdstripper/mdstripper.go260
-rw-r--r--modules/markup/mdstripper/mdstripper_test.go71
2 files changed, 331 insertions, 0 deletions
diff --git a/modules/markup/mdstripper/mdstripper.go b/modules/markup/mdstripper/mdstripper.go
new file mode 100644
index 0000000000..7a901b17a9
--- /dev/null
+++ b/modules/markup/mdstripper/mdstripper.go
@@ -0,0 +1,260 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package mdstripper
+
+import (
+ "bytes"
+
+ "github.com/russross/blackfriday"
+)
+
+// MarkdownStripper extends blackfriday.Renderer
+type MarkdownStripper struct {
+ blackfriday.Renderer
+ links []string
+ coallesce bool
+}
+
+const (
+ blackfridayExtensions = 0 |
+ blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
+ blackfriday.EXTENSION_TABLES |
+ blackfriday.EXTENSION_FENCED_CODE |
+ blackfriday.EXTENSION_STRIKETHROUGH |
+ blackfriday.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK |
+ blackfriday.EXTENSION_DEFINITION_LISTS |
+ blackfriday.EXTENSION_FOOTNOTES |
+ blackfriday.EXTENSION_HEADER_IDS |
+ blackfriday.EXTENSION_AUTO_HEADER_IDS |
+ // Not included in modules/markup/markdown/markdown.go;
+ // required here to process inline links
+ blackfriday.EXTENSION_AUTOLINK
+)
+
+//revive:disable:var-naming Implementing the Rendering interface requires breaking some linting rules
+
+// StripMarkdown parses markdown content by removing all markup and code blocks
+// in order to extract links and other references
+func StripMarkdown(rawBytes []byte) (string, []string) {
+ stripper := &MarkdownStripper{
+ links: make([]string, 0, 10),
+ }
+ body := blackfriday.Markdown(rawBytes, stripper, blackfridayExtensions)
+ return string(body), stripper.GetLinks()
+}
+
+// StripMarkdownBytes parses markdown content by removing all markup and code blocks
+// in order to extract links and other references
+func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
+ stripper := &MarkdownStripper{
+ links: make([]string, 0, 10),
+ }
+ body := blackfriday.Markdown(rawBytes, stripper, blackfridayExtensions)
+ return body, stripper.GetLinks()
+}
+
+// block-level callbacks
+
+// BlockCode dummy function to proceed with rendering
+func (r *MarkdownStripper) BlockCode(out *bytes.Buffer, text []byte, infoString string) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// BlockQuote dummy function to proceed with rendering
+func (r *MarkdownStripper) BlockQuote(out *bytes.Buffer, text []byte) {
+ // FIXME: perhaps it's better to leave out block quote for this?
+ r.processString(out, text, false)
+}
+
+// BlockHtml dummy function to proceed with rendering
+func (r *MarkdownStripper) BlockHtml(out *bytes.Buffer, text []byte) { //nolint
+ // Not rendered
+ r.coallesce = false
+}
+
+// Header dummy function to proceed with rendering
+func (r *MarkdownStripper) Header(out *bytes.Buffer, text func() bool, level int, id string) {
+ text()
+ r.coallesce = false
+}
+
+// HRule dummy function to proceed with rendering
+func (r *MarkdownStripper) HRule(out *bytes.Buffer) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// List dummy function to proceed with rendering
+func (r *MarkdownStripper) List(out *bytes.Buffer, text func() bool, flags int) {
+ text()
+ r.coallesce = false
+}
+
+// ListItem dummy function to proceed with rendering
+func (r *MarkdownStripper) ListItem(out *bytes.Buffer, text []byte, flags int) {
+ r.processString(out, text, false)
+}
+
+// Paragraph dummy function to proceed with rendering
+func (r *MarkdownStripper) Paragraph(out *bytes.Buffer, text func() bool) {
+ text()
+ r.coallesce = false
+}
+
+// Table dummy function to proceed with rendering
+func (r *MarkdownStripper) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
+ r.processString(out, header, false)
+ r.processString(out, body, false)
+}
+
+// TableRow dummy function to proceed with rendering
+func (r *MarkdownStripper) TableRow(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// TableHeaderCell dummy function to proceed with rendering
+func (r *MarkdownStripper) TableHeaderCell(out *bytes.Buffer, text []byte, flags int) {
+ r.processString(out, text, false)
+}
+
+// TableCell dummy function to proceed with rendering
+func (r *MarkdownStripper) TableCell(out *bytes.Buffer, text []byte, flags int) {
+ r.processString(out, text, false)
+}
+
+// Footnotes dummy function to proceed with rendering
+func (r *MarkdownStripper) Footnotes(out *bytes.Buffer, text func() bool) {
+ text()
+}
+
+// FootnoteItem dummy function to proceed with rendering
+func (r *MarkdownStripper) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
+ r.processString(out, text, false)
+}
+
+// TitleBlock dummy function to proceed with rendering
+func (r *MarkdownStripper) TitleBlock(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// Span-level callbacks
+
+// AutoLink dummy function to proceed with rendering
+func (r *MarkdownStripper) AutoLink(out *bytes.Buffer, link []byte, kind int) {
+ r.processLink(out, link, []byte{})
+}
+
+// CodeSpan dummy function to proceed with rendering
+func (r *MarkdownStripper) CodeSpan(out *bytes.Buffer, text []byte) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// DoubleEmphasis dummy function to proceed with rendering
+func (r *MarkdownStripper) DoubleEmphasis(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// Emphasis dummy function to proceed with rendering
+func (r *MarkdownStripper) Emphasis(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// Image dummy function to proceed with rendering
+func (r *MarkdownStripper) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// LineBreak dummy function to proceed with rendering
+func (r *MarkdownStripper) LineBreak(out *bytes.Buffer) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// Link dummy function to proceed with rendering
+func (r *MarkdownStripper) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
+ r.processLink(out, link, content)
+}
+
+// RawHtmlTag dummy function to proceed with rendering
+func (r *MarkdownStripper) RawHtmlTag(out *bytes.Buffer, tag []byte) { //nolint
+ // Not rendered
+ r.coallesce = false
+}
+
+// TripleEmphasis dummy function to proceed with rendering
+func (r *MarkdownStripper) TripleEmphasis(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// StrikeThrough dummy function to proceed with rendering
+func (r *MarkdownStripper) StrikeThrough(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// FootnoteRef dummy function to proceed with rendering
+func (r *MarkdownStripper) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// Low-level callbacks
+
+// Entity dummy function to proceed with rendering
+func (r *MarkdownStripper) Entity(out *bytes.Buffer, entity []byte) {
+ // FIXME: literal entities are not parsed; perhaps they should
+ r.coallesce = false
+}
+
+// NormalText dummy function to proceed with rendering
+func (r *MarkdownStripper) NormalText(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, true)
+}
+
+// Header and footer
+
+// DocumentHeader dummy function to proceed with rendering
+func (r *MarkdownStripper) DocumentHeader(out *bytes.Buffer) {
+ r.coallesce = false
+}
+
+// DocumentFooter dummy function to proceed with rendering
+func (r *MarkdownStripper) DocumentFooter(out *bytes.Buffer) {
+ r.coallesce = false
+}
+
+// GetFlags returns rendering flags
+func (r *MarkdownStripper) GetFlags() int {
+ return 0
+}
+
+//revive:enable:var-naming
+
+func doubleSpace(out *bytes.Buffer) {
+ if out.Len() > 0 {
+ out.WriteByte('\n')
+ }
+}
+
+func (r *MarkdownStripper) processString(out *bytes.Buffer, text []byte, coallesce bool) {
+ // Always break-up words
+ if !coallesce || !r.coallesce {
+ doubleSpace(out)
+ }
+ out.Write(text)
+ r.coallesce = coallesce
+}
+func (r *MarkdownStripper) processLink(out *bytes.Buffer, link []byte, content []byte) {
+ // Links are processed out of band
+ r.links = append(r.links, string(link))
+ r.coallesce = false
+}
+
+// GetLinks returns the list of link data collected while parsing
+func (r *MarkdownStripper) GetLinks() []string {
+ return r.links
+}
diff --git a/modules/markup/mdstripper/mdstripper_test.go b/modules/markup/mdstripper/mdstripper_test.go
new file mode 100644
index 0000000000..157fe1975b
--- /dev/null
+++ b/modules/markup/mdstripper/mdstripper_test.go
@@ -0,0 +1,71 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package mdstripper
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMarkdownStripper(t *testing.T) {
+ type testItem struct {
+ markdown string
+ expectedText []string
+ expectedLinks []string
+ }
+
+ list := []testItem{
+ {
+ `
+## This is a title
+
+This is [one](link) to paradise.
+This **is emphasized**.
+This: should coallesce.
+
+` + "```" + `
+This is a code block.
+This should not appear in the output at all.
+` + "```" + `
+
+* Bullet 1
+* Bullet 2
+
+A HIDDEN ` + "`" + `GHOST` + "`" + ` IN THIS LINE.
+ `,
+ []string{
+ "This is a title",
+ "This is",
+ "to paradise.",
+ "This",
+ "is emphasized",
+ ".",
+ "This: should coallesce.",
+ "Bullet 1",
+ "Bullet 2",
+ "A HIDDEN",
+ "IN THIS LINE.",
+ },
+ []string{
+ "link",
+ }},
+ }
+
+ for _, test := range list {
+ text, links := StripMarkdown([]byte(test.markdown))
+ rawlines := strings.Split(text, "\n")
+ lines := make([]string, 0, len(rawlines))
+ for _, line := range rawlines {
+ line := strings.TrimSpace(line)
+ if line != "" {
+ lines = append(lines, line)
+ }
+ }
+ assert.EqualValues(t, test.expectedText, lines)
+ assert.EqualValues(t, test.expectedLinks, links)
+ }
+}