diff options
Diffstat (limited to 'vendor/github.com/yuin/goldmark/parser/html_block.go')
-rw-r--r-- | vendor/github.com/yuin/goldmark/parser/html_block.go | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/vendor/github.com/yuin/goldmark/parser/html_block.go b/vendor/github.com/yuin/goldmark/parser/html_block.go new file mode 100644 index 0000000000..95b6918cdc --- /dev/null +++ b/vendor/github.com/yuin/goldmark/parser/html_block.go @@ -0,0 +1,228 @@ +package parser + +import ( + "bytes" + "regexp" + "strings" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var allowedBlockTags = map[string]bool{ + "address": true, + "article": true, + "aside": true, + "base": true, + "basefont": true, + "blockquote": true, + "body": true, + "caption": true, + "center": true, + "col": true, + "colgroup": true, + "dd": true, + "details": true, + "dialog": true, + "dir": true, + "div": true, + "dl": true, + "dt": true, + "fieldset": true, + "figcaption": true, + "figure": true, + "footer": true, + "form": true, + "frame": true, + "frameset": true, + "h1": true, + "h2": true, + "h3": true, + "h4": true, + "h5": true, + "h6": true, + "head": true, + "header": true, + "hr": true, + "html": true, + "iframe": true, + "legend": true, + "li": true, + "link": true, + "main": true, + "menu": true, + "menuitem": true, + "meta": true, + "nav": true, + "noframes": true, + "ol": true, + "optgroup": true, + "option": true, + "p": true, + "param": true, + "section": true, + "source": true, + "summary": true, + "table": true, + "tbody": true, + "td": true, + "tfoot": true, + "th": true, + "thead": true, + "title": true, + "tr": true, + "track": true, + "ul": true, +} + +var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style)(?:\s.*|>.*|/>.*|)\n?$`) +var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style)>.*`) + +var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`) +var htmlBlockType2Close = []byte{'-', '-', '>'} + +var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`) +var htmlBlockType3Close = []byte{'?', '>'} + +var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*\n?$`) +var htmlBlockType4Close = []byte{'>'} + +var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`) +var htmlBlockType5Close = []byte{']', ']', '>'} + +var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}</?([a-zA-Z0-9]+)(?:\s.*|>.*|/>.*|)\n?$`) + +var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?([a-zA-Z0-9]+)(` + attributePattern + `*)(:?>|/>)\s*\n?$`) + +type htmlBlockParser struct { +} + +var defaultHTMLBlockParser = &htmlBlockParser{} + +// NewHTMLBlockParser return a new BlockParser that can parse html +// blocks. +func NewHTMLBlockParser() BlockParser { + return defaultHTMLBlockParser +} + +func (b *htmlBlockParser) Trigger() []byte { + return []byte{'<'} +} + +func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) { + var node *ast.HTMLBlock + line, segment := reader.PeekLine() + last := pc.LastOpenedBlock().Node + if pos := pc.BlockOffset(); pos < 0 || line[pos] != '<' { + return nil, NoChildren + } + + if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil { + node = ast.NewHTMLBlock(ast.HTMLBlockType1) + } else if htmlBlockType2OpenRegexp.Match(line) { + node = ast.NewHTMLBlock(ast.HTMLBlockType2) + } else if htmlBlockType3OpenRegexp.Match(line) { + node = ast.NewHTMLBlock(ast.HTMLBlockType3) + } else if htmlBlockType4OpenRegexp.Match(line) { + node = ast.NewHTMLBlock(ast.HTMLBlockType4) + } else if htmlBlockType5OpenRegexp.Match(line) { + node = ast.NewHTMLBlock(ast.HTMLBlockType5) + } else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil { + isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/")) + hasAttr := match[6] != match[7] + tagName := strings.ToLower(string(line[match[4]:match[5]])) + _, ok := allowedBlockTags[tagName] + if ok { + node = ast.NewHTMLBlock(ast.HTMLBlockType6) + } else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph + node = ast.NewHTMLBlock(ast.HTMLBlockType7) + } + } + if node == nil { + if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil { + tagName := string(line[match[2]:match[3]]) + _, ok := allowedBlockTags[strings.ToLower(tagName)] + if ok { + node = ast.NewHTMLBlock(ast.HTMLBlockType6) + } + } + } + if node != nil { + reader.Advance(segment.Len() - 1) + node.Lines().Append(segment) + return node, NoChildren + } + return nil, NoChildren +} + +func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State { + htmlBlock := node.(*ast.HTMLBlock) + lines := htmlBlock.Lines() + line, segment := reader.PeekLine() + var closurePattern []byte + + switch htmlBlock.HTMLBlockType { + case ast.HTMLBlockType1: + if lines.Len() == 1 { + firstLine := lines.At(0) + if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) { + return Close + } + } + if htmlBlockType1CloseRegexp.Match(line) { + htmlBlock.ClosureLine = segment + reader.Advance(segment.Len() - 1) + return Close + } + case ast.HTMLBlockType2: + closurePattern = htmlBlockType2Close + fallthrough + case ast.HTMLBlockType3: + if closurePattern == nil { + closurePattern = htmlBlockType3Close + } + fallthrough + case ast.HTMLBlockType4: + if closurePattern == nil { + closurePattern = htmlBlockType4Close + } + fallthrough + case ast.HTMLBlockType5: + if closurePattern == nil { + closurePattern = htmlBlockType5Close + } + + if lines.Len() == 1 { + firstLine := lines.At(0) + if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) { + return Close + } + } + if bytes.Contains(line, closurePattern) { + htmlBlock.ClosureLine = segment + reader.Advance(segment.Len() - 1) + return Close + } + + case ast.HTMLBlockType6, ast.HTMLBlockType7: + if util.IsBlank(line) { + return Close + } + } + node.Lines().Append(segment) + reader.Advance(segment.Len() - 1) + return Continue | NoChildren +} + +func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) { + // nothing to do +} + +func (b *htmlBlockParser) CanInterruptParagraph() bool { + return true +} + +func (b *htmlBlockParser) CanAcceptIndentedLine() bool { + return false +} |