|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357 |
- package org
-
- import (
- "fmt"
- "path"
- "regexp"
- "strings"
- "time"
- "unicode"
- )
-
- type Text struct {
- Content string
- IsRaw bool
- }
-
- type LineBreak struct{ Count int }
- type ExplicitLineBreak struct{}
-
- type StatisticToken struct{ Content string }
-
- type Timestamp struct {
- Time time.Time
- IsDate bool
- Interval string
- }
-
- type Emphasis struct {
- Kind string
- Content []Node
- }
-
- type LatexFragment struct {
- OpeningPair string
- ClosingPair string
- Content []Node
- }
-
- type FootnoteLink struct {
- Name string
- Definition *FootnoteDefinition
- }
-
- type RegularLink struct {
- Protocol string
- Description []Node
- URL string
- AutoLink bool
- }
-
- var validURLCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;="
- var autolinkProtocols = regexp.MustCompile(`^(https?|ftp|file)$`)
- var imageExtensionRegexp = regexp.MustCompile(`^[.](png|gif|jpe?g|svg|tiff?)$`)
- var videoExtensionRegexp = regexp.MustCompile(`^[.](webm|mp4)$`)
-
- var subScriptSuperScriptRegexp = regexp.MustCompile(`^([_^]){([^{}]+?)}`)
- var timestampRegexp = regexp.MustCompile(`^<(\d{4}-\d{2}-\d{2})( [A-Za-z]+)?( \d{2}:\d{2})?( \+\d+[dwmy])?>`)
- var footnoteRegexp = regexp.MustCompile(`^\[fn:([\w-]*?)(:(.*?))?\]`)
- var statisticsTokenRegexp = regexp.MustCompile(`^\[(\d+/\d+|\d+%)\]`)
- var latexFragmentRegexp = regexp.MustCompile(`(?s)^\\begin{(\w+)}(.*)\\end{(\w+)}`)
-
- var timestampFormat = "2006-01-02 Mon 15:04"
- var datestampFormat = "2006-01-02 Mon"
-
- var latexFragmentPairs = map[string]string{
- `\(`: `\)`,
- `\[`: `\]`,
- `$$`: `$$`,
- }
-
- func (d *Document) parseInline(input string) (nodes []Node) {
- previous, current := 0, 0
- for current < len(input) {
- rewind, consumed, node := 0, 0, (Node)(nil)
- switch input[current] {
- case '^':
- consumed, node = d.parseSubOrSuperScript(input, current)
- case '_':
- consumed, node = d.parseSubScriptOrEmphasis(input, current)
- case '*', '/', '+':
- consumed, node = d.parseEmphasis(input, current, false)
- case '=', '~':
- consumed, node = d.parseEmphasis(input, current, true)
- case '[':
- consumed, node = d.parseOpeningBracket(input, current)
- case '<':
- consumed, node = d.parseTimestamp(input, current)
- case '\\':
- consumed, node = d.parseExplicitLineBreakOrLatexFragment(input, current)
- case '$':
- consumed, node = d.parseLatexFragment(input, current)
- case '\n':
- consumed, node = d.parseLineBreak(input, current)
- case ':':
- rewind, consumed, node = d.parseAutoLink(input, current)
- current -= rewind
- }
- if consumed != 0 {
- if current > previous {
- nodes = append(nodes, Text{input[previous:current], false})
- }
- if node != nil {
- nodes = append(nodes, node)
- }
- current += consumed
- previous = current
- } else {
- current++
- }
- }
-
- if previous < len(input) {
- nodes = append(nodes, Text{input[previous:], false})
- }
- return nodes
- }
-
- func (d *Document) parseRawInline(input string) (nodes []Node) {
- previous, current := 0, 0
- for current < len(input) {
- if input[current] == '\n' {
- consumed, node := d.parseLineBreak(input, current)
- if current > previous {
- nodes = append(nodes, Text{input[previous:current], true})
- }
- nodes = append(nodes, node)
- current += consumed
- previous = current
- } else {
- current++
- }
- }
- if previous < len(input) {
- nodes = append(nodes, Text{input[previous:], true})
- }
- return nodes
- }
-
- func (d *Document) parseLineBreak(input string, start int) (int, Node) {
- i := start
- for ; i < len(input) && input[i] == '\n'; i++ {
- }
- return i - start, LineBreak{i - start}
- }
-
- func (d *Document) parseExplicitLineBreakOrLatexFragment(input string, start int) (int, Node) {
- switch {
- case start+2 >= len(input):
- case input[start+1] == '\\' && start != 0 && input[start-1] != '\n':
- for i := start + 2; i <= len(input)-1 && unicode.IsSpace(rune(input[i])); i++ {
- if input[i] == '\n' {
- return i + 1 - start, ExplicitLineBreak{}
- }
- }
- case input[start+1] == '(' || input[start+1] == '[':
- return d.parseLatexFragment(input, start)
- case strings.Index(input[start:], `\begin{`) == 0:
- if m := latexFragmentRegexp.FindStringSubmatch(input[start:]); m != nil {
- if open, content, close := m[1], m[2], m[3]; open == close {
- openingPair, closingPair := `\begin{`+open+`}`, `\end{`+close+`}`
- i := strings.Index(input[start:], closingPair)
- return i + len(closingPair), LatexFragment{openingPair, closingPair, d.parseRawInline(content)}
- }
- }
- }
- return 0, nil
- }
-
- func (d *Document) parseLatexFragment(input string, start int) (int, Node) {
- if start+2 >= len(input) {
- return 0, nil
- }
- openingPair := input[start : start+2]
- closingPair := latexFragmentPairs[openingPair]
- if i := strings.Index(input[start+2:], closingPair); i != -1 {
- content := d.parseRawInline(input[start+2 : start+2+i])
- return i + 2 + 2, LatexFragment{openingPair, closingPair, content}
- }
- return 0, nil
- }
-
- func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
- if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
- return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2], false}}}
- }
- return 0, nil
- }
-
- func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) {
- if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
- return consumed, node
- }
- return d.parseEmphasis(input, start, false)
- }
-
- func (d *Document) parseOpeningBracket(input string, start int) (int, Node) {
- if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' {
- return d.parseRegularLink(input, start)
- } else if footnoteRegexp.MatchString(input[start:]) {
- return d.parseFootnoteReference(input, start)
- } else if statisticsTokenRegexp.MatchString(input[start:]) {
- return d.parseStatisticToken(input, start)
- }
- return 0, nil
- }
-
- func (d *Document) parseFootnoteReference(input string, start int) (int, Node) {
- if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil {
- name, definition := m[1], m[3]
- if name == "" && definition == "" {
- return 0, nil
- }
- link := FootnoteLink{name, nil}
- if definition != "" {
- link.Definition = &FootnoteDefinition{name, []Node{Paragraph{d.parseInline(definition)}}, true}
- }
- return len(m[0]), link
- }
- return 0, nil
- }
-
- func (d *Document) parseStatisticToken(input string, start int) (int, Node) {
- if m := statisticsTokenRegexp.FindStringSubmatch(input[start:]); m != nil {
- return len(m[1]) + 2, StatisticToken{m[1]}
- }
- return 0, nil
- }
-
- func (d *Document) parseAutoLink(input string, start int) (int, int, Node) {
- if !d.AutoLink || start == 0 || len(input[start:]) < 3 || input[start:start+3] != "://" {
- return 0, 0, nil
- }
- protocolStart, protocol := start-1, ""
- for ; protocolStart > 0; protocolStart-- {
- if !unicode.IsLetter(rune(input[protocolStart])) {
- protocolStart++
- break
- }
- }
- if m := autolinkProtocols.FindStringSubmatch(input[protocolStart:start]); m != nil {
- protocol = m[1]
- } else {
- return 0, 0, nil
- }
- end := start
- for ; end < len(input) && strings.ContainsRune(validURLCharacters, rune(input[end])); end++ {
- }
- path := input[start:end]
- if path == "://" {
- return 0, 0, nil
- }
- return len(protocol), len(path + protocol), RegularLink{protocol, nil, protocol + path, true}
- }
-
- func (d *Document) parseRegularLink(input string, start int) (int, Node) {
- input = input[start:]
- if len(input) < 3 || input[:2] != "[[" || input[2] == '[' {
- return 0, nil
- }
- end := strings.Index(input, "]]")
- if end == -1 {
- return 0, nil
- }
- rawLinkParts := strings.Split(input[2:end], "][")
- description, link := ([]Node)(nil), rawLinkParts[0]
- if len(rawLinkParts) == 2 {
- link, description = rawLinkParts[0], d.parseInline(rawLinkParts[1])
- }
- if strings.ContainsRune(link, '\n') {
- return 0, nil
- }
- consumed := end + 2
- protocol, linkParts := "", strings.SplitN(link, ":", 2)
- if len(linkParts) == 2 {
- protocol = linkParts[0]
- }
- return consumed, RegularLink{protocol, description, link, false}
- }
-
- func (d *Document) parseTimestamp(input string, start int) (int, Node) {
- if m := timestampRegexp.FindStringSubmatch(input[start:]); m != nil {
- ddmmyy, hhmm, interval, isDate := m[1], m[3], strings.TrimSpace(m[4]), false
- if hhmm == "" {
- hhmm, isDate = "00:00", true
- }
- t, err := time.Parse(timestampFormat, fmt.Sprintf("%s Mon %s", ddmmyy, hhmm))
- if err != nil {
- return 0, nil
- }
- timestamp := Timestamp{t, isDate, interval}
- return len(m[0]), timestamp
- }
- return 0, nil
- }
-
- func (d *Document) parseEmphasis(input string, start int, isRaw bool) (int, Node) {
- marker, i := input[start], start
- if !hasValidPreAndBorderChars(input, i) {
- return 0, nil
- }
- for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ {
- if input[i] == '\n' {
- consumedNewLines++
- }
-
- if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
- if isRaw {
- return i + 1 - start, Emphasis{input[start : start+1], d.parseRawInline(input[start+1 : i])}
- }
- return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
- }
- }
- return 0, nil
- }
-
- // see org-emphasis-regexp-components (emacs elisp variable)
-
- func hasValidPreAndBorderChars(input string, i int) bool {
- return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1])))
- }
-
- func hasValidPostAndBorderChars(input string, i int) bool {
- return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1])))
- }
-
- func isValidPreChar(r rune) bool {
- return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r)
- }
-
- func isValidPostChar(r rune) bool {
- return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r)
- }
-
- func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) }
-
- func (l RegularLink) Kind() string {
- if p := l.Protocol; l.Description != nil || (p != "" && p != "file" && p != "http" && p != "https") {
- return "regular"
- }
- if imageExtensionRegexp.MatchString(path.Ext(l.URL)) {
- return "image"
- }
- if videoExtensionRegexp.MatchString(path.Ext(l.URL)) {
- return "video"
- }
- return "regular"
- }
-
- func (n Text) String() string { return orgWriter.WriteNodesAsString(n) }
- func (n LineBreak) String() string { return orgWriter.WriteNodesAsString(n) }
- func (n ExplicitLineBreak) String() string { return orgWriter.WriteNodesAsString(n) }
- func (n StatisticToken) String() string { return orgWriter.WriteNodesAsString(n) }
- func (n Emphasis) String() string { return orgWriter.WriteNodesAsString(n) }
- func (n LatexFragment) String() string { return orgWriter.WriteNodesAsString(n) }
- func (n FootnoteLink) String() string { return orgWriter.WriteNodesAsString(n) }
- func (n RegularLink) String() string { return orgWriter.WriteNodesAsString(n) }
- func (n Timestamp) String() string { return orgWriter.WriteNodesAsString(n) }
|