You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

inline.go 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. package org
  2. import (
  3. "fmt"
  4. "path"
  5. "regexp"
  6. "strings"
  7. "time"
  8. "unicode"
  9. )
  10. type Text struct {
  11. Content string
  12. IsRaw bool
  13. }
  14. type LineBreak struct{ Count int }
  15. type ExplicitLineBreak struct{}
  16. type StatisticToken struct{ Content string }
  17. type Timestamp struct {
  18. Time time.Time
  19. IsDate bool
  20. Interval string
  21. }
  22. type Emphasis struct {
  23. Kind string
  24. Content []Node
  25. }
  26. type LatexFragment struct {
  27. OpeningPair string
  28. ClosingPair string
  29. Content []Node
  30. }
  31. type FootnoteLink struct {
  32. Name string
  33. Definition *FootnoteDefinition
  34. }
  35. type RegularLink struct {
  36. Protocol string
  37. Description []Node
  38. URL string
  39. AutoLink bool
  40. }
  41. var validURLCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;="
  42. var autolinkProtocols = regexp.MustCompile(`^(https?|ftp|file)$`)
  43. var imageExtensionRegexp = regexp.MustCompile(`^[.](png|gif|jpe?g|svg|tiff?)$`)
  44. var videoExtensionRegexp = regexp.MustCompile(`^[.](webm|mp4)$`)
  45. var subScriptSuperScriptRegexp = regexp.MustCompile(`^([_^]){([^{}]+?)}`)
  46. var timestampRegexp = regexp.MustCompile(`^<(\d{4}-\d{2}-\d{2})( [A-Za-z]+)?( \d{2}:\d{2})?( \+\d+[dwmy])?>`)
  47. var footnoteRegexp = regexp.MustCompile(`^\[fn:([\w-]*?)(:(.*?))?\]`)
  48. var statisticsTokenRegexp = regexp.MustCompile(`^\[(\d+/\d+|\d+%)\]`)
  49. var latexFragmentRegexp = regexp.MustCompile(`(?s)^\\begin{(\w+)}(.*)\\end{(\w+)}`)
  50. var timestampFormat = "2006-01-02 Mon 15:04"
  51. var datestampFormat = "2006-01-02 Mon"
  52. var latexFragmentPairs = map[string]string{
  53. `\(`: `\)`,
  54. `\[`: `\]`,
  55. `$$`: `$$`,
  56. }
  57. func (d *Document) parseInline(input string) (nodes []Node) {
  58. previous, current := 0, 0
  59. for current < len(input) {
  60. rewind, consumed, node := 0, 0, (Node)(nil)
  61. switch input[current] {
  62. case '^':
  63. consumed, node = d.parseSubOrSuperScript(input, current)
  64. case '_':
  65. consumed, node = d.parseSubScriptOrEmphasis(input, current)
  66. case '*', '/', '+':
  67. consumed, node = d.parseEmphasis(input, current, false)
  68. case '=', '~':
  69. consumed, node = d.parseEmphasis(input, current, true)
  70. case '[':
  71. consumed, node = d.parseOpeningBracket(input, current)
  72. case '<':
  73. consumed, node = d.parseTimestamp(input, current)
  74. case '\\':
  75. consumed, node = d.parseExplicitLineBreakOrLatexFragment(input, current)
  76. case '$':
  77. consumed, node = d.parseLatexFragment(input, current)
  78. case '\n':
  79. consumed, node = d.parseLineBreak(input, current)
  80. case ':':
  81. rewind, consumed, node = d.parseAutoLink(input, current)
  82. current -= rewind
  83. }
  84. if consumed != 0 {
  85. if current > previous {
  86. nodes = append(nodes, Text{input[previous:current], false})
  87. }
  88. if node != nil {
  89. nodes = append(nodes, node)
  90. }
  91. current += consumed
  92. previous = current
  93. } else {
  94. current++
  95. }
  96. }
  97. if previous < len(input) {
  98. nodes = append(nodes, Text{input[previous:], false})
  99. }
  100. return nodes
  101. }
  102. func (d *Document) parseRawInline(input string) (nodes []Node) {
  103. previous, current := 0, 0
  104. for current < len(input) {
  105. if input[current] == '\n' {
  106. consumed, node := d.parseLineBreak(input, current)
  107. if current > previous {
  108. nodes = append(nodes, Text{input[previous:current], true})
  109. }
  110. nodes = append(nodes, node)
  111. current += consumed
  112. previous = current
  113. } else {
  114. current++
  115. }
  116. }
  117. if previous < len(input) {
  118. nodes = append(nodes, Text{input[previous:], true})
  119. }
  120. return nodes
  121. }
  122. func (d *Document) parseLineBreak(input string, start int) (int, Node) {
  123. i := start
  124. for ; i < len(input) && input[i] == '\n'; i++ {
  125. }
  126. return i - start, LineBreak{i - start}
  127. }
  128. func (d *Document) parseExplicitLineBreakOrLatexFragment(input string, start int) (int, Node) {
  129. switch {
  130. case start+2 >= len(input):
  131. case input[start+1] == '\\' && start != 0 && input[start-1] != '\n':
  132. for i := start + 2; i <= len(input)-1 && unicode.IsSpace(rune(input[i])); i++ {
  133. if input[i] == '\n' {
  134. return i + 1 - start, ExplicitLineBreak{}
  135. }
  136. }
  137. case input[start+1] == '(' || input[start+1] == '[':
  138. return d.parseLatexFragment(input, start)
  139. case strings.Index(input[start:], `\begin{`) == 0:
  140. if m := latexFragmentRegexp.FindStringSubmatch(input[start:]); m != nil {
  141. if open, content, close := m[1], m[2], m[3]; open == close {
  142. openingPair, closingPair := `\begin{`+open+`}`, `\end{`+close+`}`
  143. i := strings.Index(input[start:], closingPair)
  144. return i + len(closingPair), LatexFragment{openingPair, closingPair, d.parseRawInline(content)}
  145. }
  146. }
  147. }
  148. return 0, nil
  149. }
  150. func (d *Document) parseLatexFragment(input string, start int) (int, Node) {
  151. if start+2 >= len(input) {
  152. return 0, nil
  153. }
  154. openingPair := input[start : start+2]
  155. closingPair := latexFragmentPairs[openingPair]
  156. if i := strings.Index(input[start+2:], closingPair); i != -1 {
  157. content := d.parseRawInline(input[start+2 : start+2+i])
  158. return i + 2 + 2, LatexFragment{openingPair, closingPair, content}
  159. }
  160. return 0, nil
  161. }
  162. func (d *Document) parseSubOrSuperScript(input string, start int) (int, Node) {
  163. if m := subScriptSuperScriptRegexp.FindStringSubmatch(input[start:]); m != nil {
  164. return len(m[2]) + 3, Emphasis{m[1] + "{}", []Node{Text{m[2], false}}}
  165. }
  166. return 0, nil
  167. }
  168. func (d *Document) parseSubScriptOrEmphasis(input string, start int) (int, Node) {
  169. if consumed, node := d.parseSubOrSuperScript(input, start); consumed != 0 {
  170. return consumed, node
  171. }
  172. return d.parseEmphasis(input, start, false)
  173. }
  174. func (d *Document) parseOpeningBracket(input string, start int) (int, Node) {
  175. if len(input[start:]) >= 2 && input[start] == '[' && input[start+1] == '[' {
  176. return d.parseRegularLink(input, start)
  177. } else if footnoteRegexp.MatchString(input[start:]) {
  178. return d.parseFootnoteReference(input, start)
  179. } else if statisticsTokenRegexp.MatchString(input[start:]) {
  180. return d.parseStatisticToken(input, start)
  181. }
  182. return 0, nil
  183. }
  184. func (d *Document) parseFootnoteReference(input string, start int) (int, Node) {
  185. if m := footnoteRegexp.FindStringSubmatch(input[start:]); m != nil {
  186. name, definition := m[1], m[3]
  187. if name == "" && definition == "" {
  188. return 0, nil
  189. }
  190. link := FootnoteLink{name, nil}
  191. if definition != "" {
  192. link.Definition = &FootnoteDefinition{name, []Node{Paragraph{d.parseInline(definition)}}, true}
  193. }
  194. return len(m[0]), link
  195. }
  196. return 0, nil
  197. }
  198. func (d *Document) parseStatisticToken(input string, start int) (int, Node) {
  199. if m := statisticsTokenRegexp.FindStringSubmatch(input[start:]); m != nil {
  200. return len(m[1]) + 2, StatisticToken{m[1]}
  201. }
  202. return 0, nil
  203. }
  204. func (d *Document) parseAutoLink(input string, start int) (int, int, Node) {
  205. if !d.AutoLink || start == 0 || len(input[start:]) < 3 || input[start:start+3] != "://" {
  206. return 0, 0, nil
  207. }
  208. protocolStart, protocol := start-1, ""
  209. for ; protocolStart > 0; protocolStart-- {
  210. if !unicode.IsLetter(rune(input[protocolStart])) {
  211. protocolStart++
  212. break
  213. }
  214. }
  215. if m := autolinkProtocols.FindStringSubmatch(input[protocolStart:start]); m != nil {
  216. protocol = m[1]
  217. } else {
  218. return 0, 0, nil
  219. }
  220. end := start
  221. for ; end < len(input) && strings.ContainsRune(validURLCharacters, rune(input[end])); end++ {
  222. }
  223. path := input[start:end]
  224. if path == "://" {
  225. return 0, 0, nil
  226. }
  227. return len(protocol), len(path + protocol), RegularLink{protocol, nil, protocol + path, true}
  228. }
  229. func (d *Document) parseRegularLink(input string, start int) (int, Node) {
  230. input = input[start:]
  231. if len(input) < 3 || input[:2] != "[[" || input[2] == '[' {
  232. return 0, nil
  233. }
  234. end := strings.Index(input, "]]")
  235. if end == -1 {
  236. return 0, nil
  237. }
  238. rawLinkParts := strings.Split(input[2:end], "][")
  239. description, link := ([]Node)(nil), rawLinkParts[0]
  240. if len(rawLinkParts) == 2 {
  241. link, description = rawLinkParts[0], d.parseInline(rawLinkParts[1])
  242. }
  243. if strings.ContainsRune(link, '\n') {
  244. return 0, nil
  245. }
  246. consumed := end + 2
  247. protocol, linkParts := "", strings.SplitN(link, ":", 2)
  248. if len(linkParts) == 2 {
  249. protocol = linkParts[0]
  250. }
  251. return consumed, RegularLink{protocol, description, link, false}
  252. }
  253. func (d *Document) parseTimestamp(input string, start int) (int, Node) {
  254. if m := timestampRegexp.FindStringSubmatch(input[start:]); m != nil {
  255. ddmmyy, hhmm, interval, isDate := m[1], m[3], strings.TrimSpace(m[4]), false
  256. if hhmm == "" {
  257. hhmm, isDate = "00:00", true
  258. }
  259. t, err := time.Parse(timestampFormat, fmt.Sprintf("%s Mon %s", ddmmyy, hhmm))
  260. if err != nil {
  261. return 0, nil
  262. }
  263. timestamp := Timestamp{t, isDate, interval}
  264. return len(m[0]), timestamp
  265. }
  266. return 0, nil
  267. }
  268. func (d *Document) parseEmphasis(input string, start int, isRaw bool) (int, Node) {
  269. marker, i := input[start], start
  270. if !hasValidPreAndBorderChars(input, i) {
  271. return 0, nil
  272. }
  273. for i, consumedNewLines := i+1, 0; i < len(input) && consumedNewLines <= d.MaxEmphasisNewLines; i++ {
  274. if input[i] == '\n' {
  275. consumedNewLines++
  276. }
  277. if input[i] == marker && i != start+1 && hasValidPostAndBorderChars(input, i) {
  278. if isRaw {
  279. return i + 1 - start, Emphasis{input[start : start+1], d.parseRawInline(input[start+1 : i])}
  280. }
  281. return i + 1 - start, Emphasis{input[start : start+1], d.parseInline(input[start+1 : i])}
  282. }
  283. }
  284. return 0, nil
  285. }
  286. // see org-emphasis-regexp-components (emacs elisp variable)
  287. func hasValidPreAndBorderChars(input string, i int) bool {
  288. return (i+1 >= len(input) || isValidBorderChar(rune(input[i+1]))) && (i == 0 || isValidPreChar(rune(input[i-1])))
  289. }
  290. func hasValidPostAndBorderChars(input string, i int) bool {
  291. return (i == 0 || isValidBorderChar(rune(input[i-1]))) && (i+1 >= len(input) || isValidPostChar(rune(input[i+1])))
  292. }
  293. func isValidPreChar(r rune) bool {
  294. return unicode.IsSpace(r) || strings.ContainsRune(`-({'"`, r)
  295. }
  296. func isValidPostChar(r rune) bool {
  297. return unicode.IsSpace(r) || strings.ContainsRune(`-.,:!?;'")}[`, r)
  298. }
  299. func isValidBorderChar(r rune) bool { return !unicode.IsSpace(r) }
  300. func (l RegularLink) Kind() string {
  301. if p := l.Protocol; l.Description != nil || (p != "" && p != "file" && p != "http" && p != "https") {
  302. return "regular"
  303. }
  304. if imageExtensionRegexp.MatchString(path.Ext(l.URL)) {
  305. return "image"
  306. }
  307. if videoExtensionRegexp.MatchString(path.Ext(l.URL)) {
  308. return "video"
  309. }
  310. return "regular"
  311. }
  312. func (n Text) String() string { return orgWriter.WriteNodesAsString(n) }
  313. func (n LineBreak) String() string { return orgWriter.WriteNodesAsString(n) }
  314. func (n ExplicitLineBreak) String() string { return orgWriter.WriteNodesAsString(n) }
  315. func (n StatisticToken) String() string { return orgWriter.WriteNodesAsString(n) }
  316. func (n Emphasis) String() string { return orgWriter.WriteNodesAsString(n) }
  317. func (n LatexFragment) String() string { return orgWriter.WriteNodesAsString(n) }
  318. func (n FootnoteLink) String() string { return orgWriter.WriteNodesAsString(n) }
  319. func (n RegularLink) String() string { return orgWriter.WriteNodesAsString(n) }
  320. func (n Timestamp) String() string { return orgWriter.WriteNodesAsString(n) }