@@ -96,7 +96,7 @@ require ( | |||
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 | |||
github.com/urfave/cli v1.20.0 | |||
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 | |||
github.com/yuin/goldmark v1.1.19 | |||
github.com/yuin/goldmark v1.1.23 | |||
go.etcd.io/bbolt v1.3.3 // indirect | |||
golang.org/x/crypto v0.0.0-20200221231518-2aa609cf4a9d | |||
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa |
@@ -574,8 +574,8 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q | |||
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= | |||
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 h1:HsIQ6yAjfjQ3IxPGrTusxp6Qxn92gNVq2x5CbvQvx3w= | |||
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53/go.mod h1:f6elajwZV+xceiaqgRL090YzLEDGSbqr3poGL3ZgXYo= | |||
github.com/yuin/goldmark v1.1.19 h1:0s2/60x0XsFCXHeFut+F3azDVAAyIMyUfJRbRexiTYs= | |||
github.com/yuin/goldmark v1.1.19/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= | |||
github.com/yuin/goldmark v1.1.23 h1:eTodJ8hwEUvwXhb9qxQNuL/q1d+xMQClrXR4mdvV7gs= | |||
github.com/yuin/goldmark v1.1.23/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= | |||
github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs= | |||
github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= | |||
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= |
@@ -6,48 +6,48 @@ goldmark | |||
[![https://coveralls.io/github/yuin/goldmark](https://coveralls.io/repos/github/yuin/goldmark/badge.svg?branch=master)](https://coveralls.io/github/yuin/goldmark) | |||
[![https://goreportcard.com/report/github.com/yuin/goldmark](https://goreportcard.com/badge/github.com/yuin/goldmark)](https://goreportcard.com/report/github.com/yuin/goldmark) | |||
> A Markdown parser written in Go. Easy to extend, standard compliant, well structured. | |||
> A Markdown parser written in Go. Easy to extend, standards-compliant, well-structured. | |||
goldmark is compliant with CommonMark 0.29. | |||
Motivation | |||
---------------------- | |||
I need a Markdown parser for Go that meets following conditions: | |||
I needed a Markdown parser for Go that satisfies the following requirements: | |||
- Easy to extend. | |||
- Markdown is poor in document expressions compared with other light markup languages like reStructuredText. | |||
- Markdown is poor in document expressions compared to other light markup languages such as reStructuredText. | |||
- We have extensions to the Markdown syntax, e.g. PHP Markdown Extra, GitHub Flavored Markdown. | |||
- Standard compliant. | |||
- Standards-compliant. | |||
- Markdown has many dialects. | |||
- GitHub Flavored Markdown is widely used and it is based on CommonMark aside from whether CommonMark is good specification or not. | |||
- CommonMark is too complicated and hard to implement. | |||
- Well structured. | |||
- AST based, and preserves source position of nodes. | |||
- GitHub-Flavored Markdown is widely used and is based upon CommonMark, effectively mooting the question of whether or not CommonMark is an ideal specification. | |||
- CommonMark is complicated and hard to implement. | |||
- Well-structured. | |||
- AST-based; preserves source position of nodes. | |||
- Written in pure Go. | |||
[golang-commonmark](https://gitlab.com/golang-commonmark/markdown) may be a good choice, but it seems to be a copy of [markdown-it](https://github.com/markdown-it). | |||
[blackfriday.v2](https://github.com/russross/blackfriday/tree/v2) is a fast and widely used implementation, but it is not CommonMark compliant and cannot be extended from outside of the package since its AST uses structs instead of interfaces. | |||
[blackfriday.v2](https://github.com/russross/blackfriday/tree/v2) is a fast and widely-used implementation, but is not CommonMark-compliant and cannot be extended from outside of the package, since its AST uses structs instead of interfaces. | |||
Furthermore, its behavior differs from other implementations in some cases, especially regarding lists: ([Deep nested lists don't output correctly #329](https://github.com/russross/blackfriday/issues/329), [List block cannot have a second line #244](https://github.com/russross/blackfriday/issues/244), etc). | |||
Furthermore, its behavior differs from other implementations in some cases, especially regarding lists: [Deep nested lists don't output correctly #329](https://github.com/russross/blackfriday/issues/329), [List block cannot have a second line #244](https://github.com/russross/blackfriday/issues/244), etc. | |||
This behavior sometimes causes problems. If you migrate your Markdown text to blackfriday-based wikis from GitHub, many lists will immediately be broken. | |||
This behavior sometimes causes problems. If you migrate your Markdown text from GitHub to blackfriday-based wikis, many lists will immediately be broken. | |||
As mentioned above, CommonMark is too complicated and hard to implement, so Markdown parsers based on CommonMark barely exist. | |||
As mentioned above, CommonMark is complicated and hard to implement, so Markdown parsers based on CommonMark are few and far between. | |||
Features | |||
---------------------- | |||
- **Standard compliant.** goldmark gets full compliance with the latest CommonMark spec. | |||
- **Standards-compliant.** goldmark is fully compliant with the latest [CommonMark](https://commonmark.org/) specification. | |||
- **Extensible.** Do you want to add a `@username` mention syntax to Markdown? | |||
You can easily do it in goldmark. You can add your AST nodes, | |||
parsers for block level elements, parsers for inline level elements, | |||
transformers for paragraphs, transformers for whole AST structure, and | |||
You can easily do so in goldmark. You can add your AST nodes, | |||
parsers for block-level elements, parsers for inline-level elements, | |||
transformers for paragraphs, transformers for the whole AST structure, and | |||
renderers. | |||
- **Performance.** goldmark performs pretty much equally to cmark, | |||
- **Performance.** goldmark's performance is on par with that of cmark, | |||
the CommonMark reference implementation written in C. | |||
- **Robust.** goldmark is tested with [go-fuzz](https://github.com/dvyukov/go-fuzz), a fuzz testing tool. | |||
- **Builtin extensions.** goldmark ships with common extensions like tables, strikethrough, | |||
- **Built-in extensions.** goldmark ships with common extensions like tables, strikethrough, | |||
task lists, and definition lists. | |||
- **Depends only on standard libraries.** | |||
@@ -62,15 +62,15 @@ Usage | |||
---------------------- | |||
Import packages: | |||
``` | |||
```go | |||
import ( | |||
"bytes" | |||
"github.com/yuin/goldmark" | |||
"bytes" | |||
"github.com/yuin/goldmark" | |||
) | |||
``` | |||
Convert Markdown documents with the CommonMark compliant mode: | |||
Convert Markdown documents with the CommonMark-compliant mode: | |||
```go | |||
var buf bytes.Buffer | |||
@@ -105,11 +105,11 @@ Custom parser and renderer | |||
-------------------------- | |||
```go | |||
import ( | |||
"bytes" | |||
"github.com/yuin/goldmark" | |||
"github.com/yuin/goldmark/extension" | |||
"github.com/yuin/goldmark/parser" | |||
"github.com/yuin/goldmark/renderer/html" | |||
"bytes" | |||
"github.com/yuin/goldmark" | |||
"github.com/yuin/goldmark/extension" | |||
"github.com/yuin/goldmark/parser" | |||
"github.com/yuin/goldmark/renderer/html" | |||
) | |||
md := goldmark.New( | |||
@@ -128,6 +128,14 @@ if err := md.Convert(source, &buf); err != nil { | |||
} | |||
``` | |||
| Functional option | Type | Description | | |||
| ----------------- | ---- | ----------- | | |||
| `goldmark.WithParser` | `parser.Parser` | This option must be passed before `goldmark.WithParserOptions` and `goldmark.WithExtensions` | | |||
| `goldmark.WithRenderer` | `renderer.Renderer` | This option must be passed before `goldmark.WithRendererOptions` and `goldmark.WithExtensions` | | |||
| `goldmark.WithParserOptions` | `...parser.Option` | | | |||
| `goldmark.WithRendererOptions` | `...renderer.Option` | | | |||
| `goldmark.WithExtensions` | `...goldmark.Extender` | | | |||
Parser and Renderer options | |||
------------------------------ | |||
@@ -147,33 +155,33 @@ Parser and Renderer options | |||
| Functional option | Type | Description | | |||
| ----------------- | ---- | ----------- | | |||
| `html.WithWriter` | `html.Writer` | `html.Writer` for writing contents to an `io.Writer`. | | |||
| `html.WithHardWraps` | `-` | Render new lines as `<br>`.| | |||
| `html.WithHardWraps` | `-` | Render newlines as `<br>`.| | |||
| `html.WithXHTML` | `-` | Render as XHTML. | | |||
| `html.WithUnsafe` | `-` | By default, goldmark does not render raw HTML and potentially dangerous links. With this option, goldmark renders these contents as written. | | |||
| `html.WithUnsafe` | `-` | By default, goldmark does not render raw HTML or potentially dangerous links. With this option, goldmark renders such content as written. | | |||
### Built-in extensions | |||
- `extension.Table` | |||
- [GitHub Flavored Markdown: Tables](https://github.github.com/gfm/#tables-extension-) | |||
- [GitHub Flavored Markdown: Tables](https://github.github.com/gfm/#tables-extension-) | |||
- `extension.Strikethrough` | |||
- [GitHub Flavored Markdown: Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) | |||
- [GitHub Flavored Markdown: Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) | |||
- `extension.Linkify` | |||
- [GitHub Flavored Markdown: Autolinks](https://github.github.com/gfm/#autolinks-extension-) | |||
- [GitHub Flavored Markdown: Autolinks](https://github.github.com/gfm/#autolinks-extension-) | |||
- `extension.TaskList` | |||
- [GitHub Flavored Markdown: Task list items](https://github.github.com/gfm/#task-list-items-extension-) | |||
- [GitHub Flavored Markdown: Task list items](https://github.github.com/gfm/#task-list-items-extension-) | |||
- `extension.GFM` | |||
- This extension enables Table, Strikethrough, Linkify and TaskList. | |||
- This extension does not filter tags defined in [6.11: Disallowed Raw HTML (extension)](https://github.github.com/gfm/#disallowed-raw-html-extension-). | |||
If you need to filter HTML tags, see [Security](#security) | |||
- This extension enables Table, Strikethrough, Linkify and TaskList. | |||
- This extension does not filter tags defined in [6.11: Disallowed Raw HTML (extension)](https://github.github.com/gfm/#disallowed-raw-html-extension-). | |||
If you need to filter HTML tags, see [Security](#security). | |||
- `extension.DefinitionList` | |||
- [PHP Markdown Extra: Definition lists](https://michelf.ca/projects/php-markdown/extra/#def-list) | |||
- [PHP Markdown Extra: Definition lists](https://michelf.ca/projects/php-markdown/extra/#def-list) | |||
- `extension.Footnote` | |||
- [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes) | |||
- [PHP Markdown Extra: Footnotes](https://michelf.ca/projects/php-markdown/extra/#footnotes) | |||
- `extension.Typographer` | |||
- This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/). | |||
- This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/). | |||
### Attributes | |||
`parser.WithAttribute` option allows you to define attributes on some elements. | |||
The `parser.WithAttribute` option allows you to define attributes on some elements. | |||
Currently only headings support attributes. | |||
@@ -197,7 +205,7 @@ heading {#id .className attrName=attrValue} | |||
### Typographer extension | |||
Typographer extension translates plain ASCII punctuation characters into typographic punctuation HTML entities. | |||
The Typographer extension translates plain ASCII punctuation characters into typographic-punctuation HTML entities. | |||
Default substitutions are: | |||
@@ -211,25 +219,65 @@ Default substitutions are: | |||
| `<<` | `«` | | |||
| `>>` | `»` | | |||
You can overwrite the substitutions by `extensions.WithTypographicSubstitutions`. | |||
You can override the defualt substitutions via `extensions.WithTypographicSubstitutions`: | |||
```go | |||
markdown := goldmark.New( | |||
goldmark.WithExtensions( | |||
extension.NewTypographer( | |||
extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ | |||
extension.LeftSingleQuote: []byte("‚"), | |||
extension.RightSingleQuote: nil, // nil disables a substitution | |||
}), | |||
), | |||
), | |||
goldmark.WithExtensions( | |||
extension.NewTypographer( | |||
extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ | |||
extension.LeftSingleQuote: []byte("‚"), | |||
extension.RightSingleQuote: nil, // nil disables a substitution | |||
}), | |||
), | |||
), | |||
) | |||
``` | |||
### Linkify extension | |||
The Linkify extension implements [Autolinks(extension)](https://github.github.com/gfm/#autolinks-extension-), as | |||
defined in [GitHub Flavored Markdown Spec](https://github.github.com/gfm/). | |||
Since the spec does not define details about URLs, there are numerous ambiguous cases. | |||
You can override autolinking patterns via options. | |||
| Functional option | Type | Description | | |||
| ----------------- | ---- | ----------- | | |||
| `extension.WithLinkifyAllowedProtocols` | `[][]byte` | List of allowed protocols such as `[][]byte{ []byte("http:") }` | | |||
| `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URLs, including protocols | | |||
| `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) | | |||
| `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email addresses` | | |||
Example, using [xurls](https://github.com/mvdan/xurls): | |||
```go | |||
import "mvdan.cc/xurls/v2" | |||
markdown := goldmark.New( | |||
goldmark.WithRendererOptions( | |||
html.WithXHTML(), | |||
html.WithUnsafe(), | |||
), | |||
goldmark.WithExtensions( | |||
extension.NewLinkify( | |||
extension.WithLinkifyAllowedProtocols([][]byte{ | |||
[]byte("http:"), | |||
[]byte("https:"), | |||
}), | |||
extension.WithLinkifyURLRegexp( | |||
xurls.Strict(), | |||
), | |||
), | |||
), | |||
) | |||
``` | |||
Security | |||
-------------------- | |||
By default, goldmark does not render raw HTML and potentially dangerous URLs. | |||
If you need to gain more control over untrusted contents, it is recommended to | |||
By default, goldmark does not render raw HTML or potentially-dangerous URLs. | |||
If you need to gain more control over untrusted contents, it is recommended that you | |||
use an HTML sanitizer such as [bluemonday](https://github.com/microcosm-cc/bluemonday). | |||
Benchmark | |||
@@ -238,11 +286,10 @@ You can run this benchmark in the `_benchmark` directory. | |||
### against other golang libraries | |||
blackfriday v2 seems to be fastest, but it is not CommonMark compliant, so the performance of | |||
blackfriday v2 cannot simply be compared with that of the other CommonMark compliant libraries. | |||
blackfriday v2 seems to be the fastest, but as it is not CommonMark compliant, its performance cannot be directly compared to that of the CommonMark-compliant libraries. | |||
Though goldmark builds clean extensible AST structure and get full compliance with | |||
CommonMark, it is reasonably fast and has lower memory consumption. | |||
goldmark, meanwhile, builds a clean, extensible AST structure, achieves full compliance with | |||
CommonMark, and consumes less memory, all while being reasonably fast. | |||
``` | |||
goos: darwin | |||
@@ -268,21 +315,21 @@ iteration: 50 | |||
average: 0.0040964230 sec | |||
``` | |||
As you can see, goldmark performs pretty much equally to cmark. | |||
As you can see, goldmark's performance is on par with cmark's. | |||
Extensions | |||
-------------------- | |||
- [goldmark-meta](https://github.com/yuin/goldmark-meta): A YAML metadata | |||
extension for the goldmark Markdown parser. | |||
- [goldmark-highlighting](https://github.com/yuin/goldmark-highlighting): A Syntax highlighting extension | |||
- [goldmark-highlighting](https://github.com/yuin/goldmark-highlighting): A syntax-highlighting extension | |||
for the goldmark markdown parser. | |||
- [goldmark-mathjax](https://github.com/litao91/goldmark-mathjax): Mathjax support for goldmark markdown parser | |||
- [goldmark-mathjax](https://github.com/litao91/goldmark-mathjax): Mathjax support for the goldmark markdown parser | |||
goldmark internal(for extension developers) | |||
---------------------------------------------- | |||
### Overview | |||
goldmark's Markdown processing is outlined as a bellow diagram. | |||
goldmark's Markdown processing is outlined in the diagram below. | |||
``` | |||
<Markdown in []byte, parser.Context> | |||
@@ -313,10 +360,11 @@ goldmark's Markdown processing is outlined as a bellow diagram. | |||
### Parsing | |||
Markdown documents are read through `text.Reader` interface. | |||
AST nodes do not have concrete text. AST nodes have segment information of the documents. It is represented by `text.Segment` . | |||
AST nodes do not have concrete text. AST nodes have segment information of the documents, represented by `text.Segment` . | |||
`text.Segment` has 3 attributes: `Start`, `End`, `Padding` . | |||
(TBC) | |||
**TODO** | |||
@@ -236,10 +236,12 @@ func (n *BaseNode) RemoveChild(self, v Node) { | |||
// RemoveChildren implements Node.RemoveChildren . | |||
func (n *BaseNode) RemoveChildren(self Node) { | |||
for c := n.firstChild; c != nil; c = c.NextSibling() { | |||
for c := n.firstChild; c != nil; { | |||
c.SetParent(nil) | |||
c.SetPreviousSibling(nil) | |||
next := c.NextSibling() | |||
c.SetNextSibling(nil) | |||
c = next | |||
} | |||
n.firstChild = nil | |||
n.lastChild = nil | |||
@@ -466,20 +468,25 @@ type Walker func(n Node, entering bool) (WalkStatus, error) | |||
// Walk walks a AST tree by the depth first search algorithm. | |||
func Walk(n Node, walker Walker) error { | |||
_, err := walkHelper(n, walker) | |||
return err | |||
} | |||
func walkHelper(n Node, walker Walker) (WalkStatus, error) { | |||
status, err := walker(n, true) | |||
if err != nil || status == WalkStop { | |||
return err | |||
return status, err | |||
} | |||
if status != WalkSkipChildren { | |||
for c := n.FirstChild(); c != nil; c = c.NextSibling() { | |||
if err = Walk(c, walker); err != nil { | |||
return err | |||
if st, err := walkHelper(c, walker); err != nil || st == WalkStop { | |||
return WalkStop, err | |||
} | |||
} | |||
} | |||
status, err = walker(n, false) | |||
if err != nil || status == WalkStop { | |||
return err | |||
return WalkStop, err | |||
} | |||
return nil | |||
return WalkContinue, nil | |||
} |
@@ -303,11 +303,11 @@ func NewBlockquote() *Blockquote { | |||
} | |||
} | |||
// A List structr represents a list of Markdown text. | |||
// A List struct represents a list of Markdown text. | |||
type List struct { | |||
BaseBlock | |||
// Marker is a markar character like '-', '+', ')' and '.'. | |||
// Marker is a marker character like '-', '+', ')' and '.'. | |||
Marker byte | |||
// IsTight is a true if this list is a 'tight' list. | |||
@@ -364,7 +364,7 @@ func NewList(marker byte) *List { | |||
type ListItem struct { | |||
BaseBlock | |||
// Offset is an offset potision of this item. | |||
// Offset is an offset position of this item. | |||
Offset int | |||
} | |||
@@ -170,7 +170,7 @@ func NewText() *Text { | |||
} | |||
} | |||
// NewTextSegment returns a new Text node with the given source potision. | |||
// NewTextSegment returns a new Text node with the given source position. | |||
func NewTextSegment(v textm.Segment) *Text { | |||
return &Text{ | |||
BaseInline: BaseInline{}, | |||
@@ -467,7 +467,7 @@ type AutoLink struct { | |||
// Inline implements Inline.Inline. | |||
func (n *AutoLink) Inline() {} | |||
// Dump implenets Node.Dump | |||
// Dump implements Node.Dump | |||
func (n *AutoLink) Dump(source []byte, level int) { | |||
segment := n.value.Segment | |||
m := map[string]string{ |
@@ -11,7 +11,7 @@ type TaskCheckBox struct { | |||
IsChecked bool | |||
} | |||
// Dump impelemtns Node.Dump. | |||
// Dump implements Node.Dump. | |||
func (n *TaskCheckBox) Dump(source []byte, level int) { | |||
m := map[string]string{ | |||
"Checked": fmt.Sprintf("%v", n.IsChecked), |
@@ -2,27 +2,153 @@ package extension | |||
import ( | |||
"bytes" | |||
"regexp" | |||
"github.com/yuin/goldmark" | |||
"github.com/yuin/goldmark/ast" | |||
"github.com/yuin/goldmark/parser" | |||
"github.com/yuin/goldmark/text" | |||
"github.com/yuin/goldmark/util" | |||
"regexp" | |||
) | |||
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) | |||
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) | |||
var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) | |||
var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) | |||
type linkifyParser struct { | |||
// An LinkifyConfig struct is a data structure that holds configuration of the | |||
// Linkify extension. | |||
type LinkifyConfig struct { | |||
AllowedProtocols [][]byte | |||
URLRegexp *regexp.Regexp | |||
WWWRegexp *regexp.Regexp | |||
EmailRegexp *regexp.Regexp | |||
} | |||
const optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols" | |||
const optLinkifyURLRegexp parser.OptionName = "LinkifyURLRegexp" | |||
const optLinkifyWWWRegexp parser.OptionName = "LinkifyWWWRegexp" | |||
const optLinkifyEmailRegexp parser.OptionName = "LinkifyEmailRegexp" | |||
// SetOption implements SetOptioner. | |||
func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) { | |||
switch name { | |||
case optLinkifyAllowedProtocols: | |||
c.AllowedProtocols = value.([][]byte) | |||
case optLinkifyURLRegexp: | |||
c.URLRegexp = value.(*regexp.Regexp) | |||
case optLinkifyWWWRegexp: | |||
c.WWWRegexp = value.(*regexp.Regexp) | |||
case optLinkifyEmailRegexp: | |||
c.EmailRegexp = value.(*regexp.Regexp) | |||
} | |||
} | |||
// A LinkifyOption interface sets options for the LinkifyOption. | |||
type LinkifyOption interface { | |||
parser.Option | |||
SetLinkifyOption(*LinkifyConfig) | |||
} | |||
type withLinkifyAllowedProtocols struct { | |||
value [][]byte | |||
} | |||
func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) { | |||
c.Options[optLinkifyAllowedProtocols] = o.value | |||
} | |||
func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) { | |||
p.AllowedProtocols = o.value | |||
} | |||
// WithLinkifyAllowedProtocols is a functional option that specify allowed | |||
// protocols in autolinks. Each protocol must end with ':' like | |||
// 'http:' . | |||
func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption { | |||
return &withLinkifyAllowedProtocols{ | |||
value: value, | |||
} | |||
} | |||
type withLinkifyURLRegexp struct { | |||
value *regexp.Regexp | |||
} | |||
func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) { | |||
c.Options[optLinkifyURLRegexp] = o.value | |||
} | |||
func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) { | |||
p.URLRegexp = o.value | |||
} | |||
// WithLinkifyURLRegexp is a functional option that specify | |||
// a pattern of the URL including a protocol. | |||
func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption { | |||
return &withLinkifyURLRegexp{ | |||
value: value, | |||
} | |||
} | |||
// WithLinkifyWWWRegexp is a functional option that specify | |||
// a pattern of the URL without a protocol. | |||
// This pattern must start with 'www.' . | |||
type withLinkifyWWWRegexp struct { | |||
value *regexp.Regexp | |||
} | |||
var defaultLinkifyParser = &linkifyParser{} | |||
func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) { | |||
c.Options[optLinkifyWWWRegexp] = o.value | |||
} | |||
func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) { | |||
p.WWWRegexp = o.value | |||
} | |||
func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption { | |||
return &withLinkifyWWWRegexp{ | |||
value: value, | |||
} | |||
} | |||
// WithLinkifyWWWRegexp is a functional otpion that specify | |||
// a pattern of the email address. | |||
type withLinkifyEmailRegexp struct { | |||
value *regexp.Regexp | |||
} | |||
func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) { | |||
c.Options[optLinkifyEmailRegexp] = o.value | |||
} | |||
func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) { | |||
p.EmailRegexp = o.value | |||
} | |||
func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption { | |||
return &withLinkifyEmailRegexp{ | |||
value: value, | |||
} | |||
} | |||
type linkifyParser struct { | |||
LinkifyConfig | |||
} | |||
// NewLinkifyParser return a new InlineParser can parse | |||
// text that seems like a URL. | |||
func NewLinkifyParser() parser.InlineParser { | |||
return defaultLinkifyParser | |||
func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser { | |||
p := &linkifyParser{ | |||
LinkifyConfig: LinkifyConfig{ | |||
AllowedProtocols: nil, | |||
URLRegexp: urlRegexp, | |||
WWWRegexp: wwwURLRegxp, | |||
}, | |||
} | |||
for _, o := range opts { | |||
o.SetLinkifyOption(&p.LinkifyConfig) | |||
} | |||
return p | |||
} | |||
func (s *linkifyParser) Trigger() []byte { | |||
@@ -53,14 +179,26 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont | |||
var m []int | |||
var protocol []byte | |||
var typ ast.AutoLinkType = ast.AutoLinkURL | |||
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { | |||
m = urlRegexp.FindSubmatchIndex(line) | |||
if s.LinkifyConfig.AllowedProtocols == nil { | |||
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { | |||
m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line) | |||
} | |||
} else { | |||
for _, prefix := range s.LinkifyConfig.AllowedProtocols { | |||
if bytes.HasPrefix(line, prefix) { | |||
m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line) | |||
break | |||
} | |||
} | |||
} | |||
if m == nil && bytes.HasPrefix(line, domainWWW) { | |||
m = wwwURLRegxp.FindSubmatchIndex(line) | |||
m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line) | |||
protocol = []byte("http") | |||
} | |||
if m != nil { | |||
if m != nil && m[0] != 0 { | |||
m = nil | |||
} | |||
if m != nil && m[0] == 0 { | |||
lastChar := line[m[1]-1] | |||
if lastChar == '.' { | |||
m[1]-- | |||
@@ -96,7 +234,15 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont | |||
return nil | |||
} | |||
typ = ast.AutoLinkEmail | |||
stop := util.FindEmailIndex(line) | |||
stop := -1 | |||
if s.LinkifyConfig.EmailRegexp == nil { | |||
stop = util.FindEmailIndex(line) | |||
} else { | |||
m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line) | |||
if m != nil && m[0] == 0 { | |||
stop = m[1] | |||
} | |||
} | |||
if stop < 0 { | |||
return nil | |||
} | |||
@@ -136,15 +282,22 @@ func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) { | |||
} | |||
type linkify struct { | |||
options []LinkifyOption | |||
} | |||
// Linkify is an extension that allow you to parse text that seems like a URL. | |||
var Linkify = &linkify{} | |||
func NewLinkify(opts ...LinkifyOption) goldmark.Extender { | |||
return &linkify{ | |||
options: opts, | |||
} | |||
} | |||
func (e *linkify) Extend(m goldmark.Markdown) { | |||
m.Parser().AddOptions( | |||
parser.WithInlineParsers( | |||
util.Prioritized(NewLinkifyParser(), 999), | |||
util.Prioritized(NewLinkifyParser(e.options...), 999), | |||
), | |||
) | |||
} |
@@ -27,7 +27,7 @@ type tableParagraphTransformer struct { | |||
var defaultTableParagraphTransformer = &tableParagraphTransformer{} | |||
// NewTableParagraphTransformer returns a new ParagraphTransformer | |||
// that can transform pargraphs into tables. | |||
// that can transform paragraphs into tables. | |||
func NewTableParagraphTransformer() parser.ParagraphTransformer { | |||
return defaultTableParagraphTransformer | |||
} |
@@ -1,6 +1,8 @@ | |||
package extension | |||
import ( | |||
"unicode" | |||
"github.com/yuin/goldmark" | |||
gast "github.com/yuin/goldmark/ast" | |||
"github.com/yuin/goldmark/parser" | |||
@@ -31,6 +33,8 @@ const ( | |||
LeftAngleQuote | |||
// RightAngleQuote is >> | |||
RightAngleQuote | |||
// Apostrophe is ' | |||
Apostrophe | |||
typographicPunctuationMax | |||
) | |||
@@ -52,6 +56,7 @@ func newDefaultSubstitutions() [][]byte { | |||
replacements[Ellipsis] = []byte("…") | |||
replacements[LeftAngleQuote] = []byte("«") | |||
replacements[RightAngleQuote] = []byte("»") | |||
replacements[Apostrophe] = []byte("’") | |||
return replacements | |||
} | |||
@@ -189,6 +194,26 @@ func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser | |||
return nil | |||
} | |||
if c == '\'' { | |||
if s.Substitutions[Apostrophe] != nil { | |||
// Handle decade abbrevations such as '90s | |||
if d.CanOpen && !d.CanClose && len(line) > 3 && util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' { | |||
after := util.ToRune(line, 4) | |||
if len(line) == 3 || unicode.IsSpace(after) || unicode.IsPunct(after) { | |||
node := gast.NewString(s.Substitutions[Apostrophe]) | |||
node.SetCode(true) | |||
block.Advance(1) | |||
return node | |||
} | |||
} | |||
// Convert normal apostrophes. This is probably more flexible than necessary but | |||
// converts any apostrophe in between two alphanumerics. | |||
if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && (util.IsAlphaNumeric(line[1])) { | |||
node := gast.NewString(s.Substitutions[Apostrophe]) | |||
node.SetCode(true) | |||
block.Advance(1) | |||
return node | |||
} | |||
} | |||
if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose { | |||
node := gast.NewString(s.Substitutions[LeftSingleQuote]) | |||
node.SetCode(true) | |||
@@ -228,10 +253,10 @@ type typographer struct { | |||
options []TypographerOption | |||
} | |||
// Typographer is an extension that repalace punctuations with typographic entities. | |||
// Typographer is an extension that replaces punctuations with typographic entities. | |||
var Typographer = &typographer{} | |||
// NewTypographer returns a new Entender that repalace punctuations with typographic entities. | |||
// NewTypographer returns a new Extender that replaces punctuations with typographic entities. | |||
func NewTypographer(opts ...TypographerOption) goldmark.Extender { | |||
return &typographer{ | |||
options: opts, |
@@ -11,7 +11,7 @@ import ( | |||
) | |||
// A DelimiterProcessor interface provides a set of functions about | |||
// Deliiter nodes. | |||
// Delimiter nodes. | |||
type DelimiterProcessor interface { | |||
// IsDelimiter returns true if given character is a delimiter, otherwise false. | |||
IsDelimiter(byte) bool | |||
@@ -38,7 +38,7 @@ type Delimiter struct { | |||
// See https://spec.commonmark.org/0.29/#can-open-emphasis for details. | |||
CanClose bool | |||
// Length is a remaining length of this delmiter. | |||
// Length is a remaining length of this delimiter. | |||
Length int | |||
// OriginalLength is a original length of this delimiter. |
@@ -147,11 +147,6 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N | |||
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) | |||
return nil | |||
} | |||
labelValue := block.Value(text.NewSegment(last.Segment.Start+1, segment.Start)) | |||
if util.IsBlank(labelValue) && !last.IsImage { | |||
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) | |||
return nil | |||
} | |||
c := block.Peek() | |||
l, pos := block.Position() | |||
@@ -351,14 +346,31 @@ func parseLinkTitle(block text.Reader) ([]byte, bool) { | |||
if opener == '(' { | |||
closer = ')' | |||
} | |||
line, _ := block.PeekLine() | |||
pos := util.FindClosure(line[1:], opener, closer, false, true) | |||
if pos < 0 { | |||
return nil, false | |||
savedLine, savedPosition := block.Position() | |||
var title []byte | |||
for i := 0; ; i++ { | |||
line, _ := block.PeekLine() | |||
if line == nil { | |||
block.SetPosition(savedLine, savedPosition) | |||
return nil, false | |||
} | |||
offset := 0 | |||
if i == 0 { | |||
offset = 1 | |||
} | |||
pos := util.FindClosure(line[offset:], opener, closer, false, true) | |||
if pos < 0 { | |||
title = append(title, line[offset:]...) | |||
block.AdvanceLine() | |||
continue | |||
} | |||
pos += offset + 1 // 1: closer | |||
block.Advance(pos) | |||
if i == 0 { // avoid allocating new slice | |||
return line[offset : pos-1], true | |||
} | |||
return append(title, line[offset:pos-1]...), true | |||
} | |||
pos += 2 // opener + closer | |||
block.Advance(pos) | |||
return line[1 : pos-1], true | |||
} | |||
func (s *linkParser) CloseBlock(parent ast.Node, block text.Reader, pc Context) { |
@@ -459,7 +459,7 @@ type Parser interface { | |||
// Parse parses the given Markdown text into AST nodes. | |||
Parse(reader text.Reader, opts ...ParseOption) ast.Node | |||
// AddOption adds the given option to thie parser. | |||
// AddOption adds the given option to this parser. | |||
AddOptions(...Option) | |||
} | |||
@@ -505,7 +505,7 @@ type BlockParser interface { | |||
// Close will be called when the parser returns Close. | |||
Close(node ast.Node, reader text.Reader, pc Context) | |||
// CanInterruptParagraph returns true if the parser can interrupt pargraphs, | |||
// CanInterruptParagraph returns true if the parser can interrupt paragraphs, | |||
// otherwise false. | |||
CanInterruptParagraph() bool | |||
@@ -660,13 +660,13 @@ func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter) | |||
} | |||
} | |||
// A Writer interface wirtes textual contents to a writer. | |||
// A Writer interface writes textual contents to a writer. | |||
type Writer interface { | |||
// Write writes the given source to writer with resolving references and unescaping | |||
// backslash escaped characters. | |||
Write(writer util.BufWriter, source []byte) | |||
// RawWrite wirtes the given source to writer without resolving references and | |||
// RawWrite writes the given source to writer without resolving references and | |||
// unescaping backslash escaped characters. | |||
RawWrite(writer util.BufWriter, source []byte) | |||
} |
@@ -7,7 +7,7 @@ import ( | |||
var space = []byte(" ") | |||
// A Segment struct holds information about source potisions. | |||
// A Segment struct holds information about source positions. | |||
type Segment struct { | |||
// Start is a start position of the segment. | |||
Start int | |||
@@ -197,7 +197,7 @@ func (s *Segments) Sliced(lo, hi int) []Segment { | |||
return s.values[lo:hi] | |||
} | |||
// Clear delete all element of the collction. | |||
// Clear delete all element of the collection. | |||
func (s *Segments) Clear() { | |||
s.values = nil | |||
} |
@@ -8,7 +8,6 @@ import ( | |||
"regexp" | |||
"sort" | |||
"strconv" | |||
"strings" | |||
"unicode/utf8" | |||
) | |||
@@ -55,7 +54,7 @@ func (b *CopyOnWriteBuffer) IsCopied() bool { | |||
return b.copied | |||
} | |||
// IsEscapedPunctuation returns true if caracter at a given index i | |||
// IsEscapedPunctuation returns true if character at a given index i | |||
// is an escaped punctuation, otherwise false. | |||
func IsEscapedPunctuation(source []byte, i int) bool { | |||
return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1]) | |||
@@ -229,7 +228,7 @@ func IndentWidth(bs []byte, currentPos int) (width, pos int) { | |||
return | |||
} | |||
// FirstNonSpacePosition returns a potisoin line that is a first nonspace | |||
// FirstNonSpacePosition returns a position line that is a first nonspace | |||
// character. | |||
func FirstNonSpacePosition(bs []byte) int { | |||
i := 0 | |||
@@ -387,6 +386,52 @@ func TrimRightSpace(source []byte) []byte { | |||
return TrimRight(source, spaces) | |||
} | |||
// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes. | |||
func DoFullUnicodeCaseFolding(v []byte) []byte { | |||
var rbuf []byte | |||
cob := NewCopyOnWriteBuffer(v) | |||
n := 0 | |||
for i := 0; i < len(v); i++ { | |||
c := v[i] | |||
if c < 0xb5 { | |||
if c >= 0x41 && c <= 0x5a { | |||
// A-Z to a-z | |||
cob.Write(v[n:i]) | |||
cob.WriteByte(c + 32) | |||
n = i + 1 | |||
} | |||
continue | |||
} | |||
if !utf8.RuneStart(c) { | |||
continue | |||
} | |||
r, length := utf8.DecodeRune(v[i:]) | |||
if r == utf8.RuneError { | |||
continue | |||
} | |||
folded, ok := unicodeCaseFoldings[r] | |||
if !ok { | |||
continue | |||
} | |||
cob.Write(v[n:i]) | |||
if rbuf == nil { | |||
rbuf = make([]byte, 4) | |||
} | |||
for _, f := range folded { | |||
l := utf8.EncodeRune(rbuf, f) | |||
cob.Write(rbuf[:l]) | |||
} | |||
i += length - 1 | |||
n = i + 1 | |||
} | |||
if cob.IsCopied() { | |||
cob.Write(v[n:]) | |||
} | |||
return cob.Bytes() | |||
} | |||
// ReplaceSpaces replaces sequence of spaces with the given repl. | |||
func ReplaceSpaces(source []byte, repl byte) []byte { | |||
var ret []byte | |||
@@ -439,13 +484,14 @@ func ToValidRune(v rune) rune { | |||
return v | |||
} | |||
// ToLinkReference convert given bytes into a valid link reference string. | |||
// ToLinkReference trims leading and trailing spaces and convert into lower | |||
// ToLinkReference converts given bytes into a valid link reference string. | |||
// ToLinkReference performs unicode case folding, trims leading and trailing spaces, converts into lower | |||
// case and replace spaces with a single space character. | |||
func ToLinkReference(v []byte) string { | |||
v = TrimLeftSpace(v) | |||
v = TrimRightSpace(v) | |||
return strings.ToLower(string(ReplaceSpaces(v, ' '))) | |||
v = DoFullUnicodeCaseFolding(v) | |||
return string(ReplaceSpaces(v, ' ')) | |||
} | |||
var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} | |||
@@ -589,7 +635,7 @@ var htmlSpace = []byte("%20") | |||
// 2. resolve numeric references | |||
// 3. resolve entity references | |||
// | |||
// URL encoded values (%xx) are keeped as is. | |||
// URL encoded values (%xx) are kept as is. | |||
func URLEscape(v []byte, resolveReference bool) []byte { | |||
if resolveReference { | |||
v = UnescapePunctuations(v) |
@@ -450,7 +450,7 @@ github.com/willf/bitset | |||
github.com/xanzy/ssh-agent | |||
# github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 | |||
github.com/yohcop/openid-go | |||
# github.com/yuin/goldmark v1.1.19 | |||
# github.com/yuin/goldmark v1.1.23 | |||
github.com/yuin/goldmark | |||
github.com/yuin/goldmark/ast | |||
github.com/yuin/goldmark/extension |