You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

markdown.go 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. //
  2. // Blackfriday Markdown Processor
  3. // Available at http://github.com/russross/blackfriday
  4. //
  5. // Copyright © 2011 Russ Ross <russ@russross.com>.
  6. // Distributed under the Simplified BSD License.
  7. // See README.md for details.
  8. //
  9. //
  10. //
  11. // Markdown parsing and processing
  12. //
  13. //
  14. package blackfriday
  15. import (
  16. "bytes"
  17. "fmt"
  18. "strings"
  19. "unicode/utf8"
  20. )
  21. const VERSION = "1.5"
  22. // These are the supported markdown parsing extensions.
  23. // OR these values together to select multiple extensions.
  24. const (
  25. EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words
  26. EXTENSION_TABLES // render tables
  27. EXTENSION_FENCED_CODE // render fenced code blocks
  28. EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked
  29. EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~
  30. EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules
  31. EXTENSION_SPACE_HEADERS // be strict about prefix header rules
  32. EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks
  33. EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four
  34. EXTENSION_FOOTNOTES // Pandoc-style footnotes
  35. EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
  36. EXTENSION_HEADER_IDS // specify header IDs with {#id}
  37. EXTENSION_TITLEBLOCK // Titleblock ala pandoc
  38. EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text
  39. EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks
  40. EXTENSION_DEFINITION_LISTS // render definition lists
  41. EXTENSION_JOIN_LINES // delete newline and join lines
  42. commonHtmlFlags = 0 |
  43. HTML_USE_XHTML |
  44. HTML_USE_SMARTYPANTS |
  45. HTML_SMARTYPANTS_FRACTIONS |
  46. HTML_SMARTYPANTS_DASHES |
  47. HTML_SMARTYPANTS_LATEX_DASHES
  48. commonExtensions = 0 |
  49. EXTENSION_NO_INTRA_EMPHASIS |
  50. EXTENSION_TABLES |
  51. EXTENSION_FENCED_CODE |
  52. EXTENSION_AUTOLINK |
  53. EXTENSION_STRIKETHROUGH |
  54. EXTENSION_SPACE_HEADERS |
  55. EXTENSION_HEADER_IDS |
  56. EXTENSION_BACKSLASH_LINE_BREAK |
  57. EXTENSION_DEFINITION_LISTS
  58. )
  59. // These are the possible flag values for the link renderer.
  60. // Only a single one of these values will be used; they are not ORed together.
  61. // These are mostly of interest if you are writing a new output format.
  62. const (
  63. LINK_TYPE_NOT_AUTOLINK = iota
  64. LINK_TYPE_NORMAL
  65. LINK_TYPE_EMAIL
  66. )
  67. // These are the possible flag values for the ListItem renderer.
  68. // Multiple flag values may be ORed together.
  69. // These are mostly of interest if you are writing a new output format.
  70. const (
  71. LIST_TYPE_ORDERED = 1 << iota
  72. LIST_TYPE_DEFINITION
  73. LIST_TYPE_TERM
  74. LIST_ITEM_CONTAINS_BLOCK
  75. LIST_ITEM_BEGINNING_OF_LIST
  76. LIST_ITEM_END_OF_LIST
  77. )
  78. // These are the possible flag values for the table cell renderer.
  79. // Only a single one of these values will be used; they are not ORed together.
  80. // These are mostly of interest if you are writing a new output format.
  81. const (
  82. TABLE_ALIGNMENT_LEFT = 1 << iota
  83. TABLE_ALIGNMENT_RIGHT
  84. TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT)
  85. )
  86. // The size of a tab stop.
  87. const (
  88. TAB_SIZE_DEFAULT = 4
  89. TAB_SIZE_EIGHT = 8
  90. )
  91. // blockTags is a set of tags that are recognized as HTML block tags.
  92. // Any of these can be included in markdown text without special escaping.
  93. var blockTags = map[string]struct{}{
  94. "blockquote": {},
  95. "del": {},
  96. "div": {},
  97. "dl": {},
  98. "fieldset": {},
  99. "form": {},
  100. "h1": {},
  101. "h2": {},
  102. "h3": {},
  103. "h4": {},
  104. "h5": {},
  105. "h6": {},
  106. "iframe": {},
  107. "ins": {},
  108. "math": {},
  109. "noscript": {},
  110. "ol": {},
  111. "pre": {},
  112. "p": {},
  113. "script": {},
  114. "style": {},
  115. "table": {},
  116. "ul": {},
  117. // HTML5
  118. "address": {},
  119. "article": {},
  120. "aside": {},
  121. "canvas": {},
  122. "figcaption": {},
  123. "figure": {},
  124. "footer": {},
  125. "header": {},
  126. "hgroup": {},
  127. "main": {},
  128. "nav": {},
  129. "output": {},
  130. "progress": {},
  131. "section": {},
  132. "video": {},
  133. }
  134. // Renderer is the rendering interface.
  135. // This is mostly of interest if you are implementing a new rendering format.
  136. //
  137. // When a byte slice is provided, it contains the (rendered) contents of the
  138. // element.
  139. //
  140. // When a callback is provided instead, it will write the contents of the
  141. // respective element directly to the output buffer and return true on success.
  142. // If the callback returns false, the rendering function should reset the
  143. // output buffer as though it had never been called.
  144. //
  145. // Currently Html and Latex implementations are provided
  146. type Renderer interface {
  147. // block-level callbacks
  148. BlockCode(out *bytes.Buffer, text []byte, infoString string)
  149. BlockQuote(out *bytes.Buffer, text []byte)
  150. BlockHtml(out *bytes.Buffer, text []byte)
  151. Header(out *bytes.Buffer, text func() bool, level int, id string)
  152. HRule(out *bytes.Buffer)
  153. List(out *bytes.Buffer, text func() bool, flags int)
  154. ListItem(out *bytes.Buffer, text []byte, flags int)
  155. Paragraph(out *bytes.Buffer, text func() bool)
  156. Table(out *bytes.Buffer, header []byte, body []byte, columnData []int)
  157. TableRow(out *bytes.Buffer, text []byte)
  158. TableHeaderCell(out *bytes.Buffer, text []byte, flags int)
  159. TableCell(out *bytes.Buffer, text []byte, flags int)
  160. Footnotes(out *bytes.Buffer, text func() bool)
  161. FootnoteItem(out *bytes.Buffer, name, text []byte, flags int)
  162. TitleBlock(out *bytes.Buffer, text []byte)
  163. // Span-level callbacks
  164. AutoLink(out *bytes.Buffer, link []byte, kind int)
  165. CodeSpan(out *bytes.Buffer, text []byte)
  166. DoubleEmphasis(out *bytes.Buffer, text []byte)
  167. Emphasis(out *bytes.Buffer, text []byte)
  168. Image(out *bytes.Buffer, link []byte, title []byte, alt []byte)
  169. LineBreak(out *bytes.Buffer)
  170. Link(out *bytes.Buffer, link []byte, title []byte, content []byte)
  171. RawHtmlTag(out *bytes.Buffer, tag []byte)
  172. TripleEmphasis(out *bytes.Buffer, text []byte)
  173. StrikeThrough(out *bytes.Buffer, text []byte)
  174. FootnoteRef(out *bytes.Buffer, ref []byte, id int)
  175. // Low-level callbacks
  176. Entity(out *bytes.Buffer, entity []byte)
  177. NormalText(out *bytes.Buffer, text []byte)
  178. // Header and footer
  179. DocumentHeader(out *bytes.Buffer)
  180. DocumentFooter(out *bytes.Buffer)
  181. GetFlags() int
  182. }
  183. // Callback functions for inline parsing. One such function is defined
  184. // for each character that triggers a response when parsing inline data.
  185. type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int
  186. // Parser holds runtime state used by the parser.
  187. // This is constructed by the Markdown function.
  188. type parser struct {
  189. r Renderer
  190. refOverride ReferenceOverrideFunc
  191. refs map[string]*reference
  192. inlineCallback [256]inlineParser
  193. flags int
  194. nesting int
  195. maxNesting int
  196. insideLink bool
  197. // Footnotes need to be ordered as well as available to quickly check for
  198. // presence. If a ref is also a footnote, it's stored both in refs and here
  199. // in notes. Slice is nil if footnotes not enabled.
  200. notes []*reference
  201. notesRecord map[string]struct{}
  202. }
  203. func (p *parser) getRef(refid string) (ref *reference, found bool) {
  204. if p.refOverride != nil {
  205. r, overridden := p.refOverride(refid)
  206. if overridden {
  207. if r == nil {
  208. return nil, false
  209. }
  210. return &reference{
  211. link: []byte(r.Link),
  212. title: []byte(r.Title),
  213. noteId: 0,
  214. hasBlock: false,
  215. text: []byte(r.Text)}, true
  216. }
  217. }
  218. // refs are case insensitive
  219. ref, found = p.refs[strings.ToLower(refid)]
  220. return ref, found
  221. }
  222. func (p *parser) isFootnote(ref *reference) bool {
  223. _, ok := p.notesRecord[string(ref.link)]
  224. return ok
  225. }
  226. //
  227. //
  228. // Public interface
  229. //
  230. //
  231. // Reference represents the details of a link.
  232. // See the documentation in Options for more details on use-case.
  233. type Reference struct {
  234. // Link is usually the URL the reference points to.
  235. Link string
  236. // Title is the alternate text describing the link in more detail.
  237. Title string
  238. // Text is the optional text to override the ref with if the syntax used was
  239. // [refid][]
  240. Text string
  241. }
  242. // ReferenceOverrideFunc is expected to be called with a reference string and
  243. // return either a valid Reference type that the reference string maps to or
  244. // nil. If overridden is false, the default reference logic will be executed.
  245. // See the documentation in Options for more details on use-case.
  246. type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
  247. // Options represents configurable overrides and callbacks (in addition to the
  248. // extension flag set) for configuring a Markdown parse.
  249. type Options struct {
  250. // Extensions is a flag set of bit-wise ORed extension bits. See the
  251. // EXTENSION_* flags defined in this package.
  252. Extensions int
  253. // ReferenceOverride is an optional function callback that is called every
  254. // time a reference is resolved.
  255. //
  256. // In Markdown, the link reference syntax can be made to resolve a link to
  257. // a reference instead of an inline URL, in one of the following ways:
  258. //
  259. // * [link text][refid]
  260. // * [refid][]
  261. //
  262. // Usually, the refid is defined at the bottom of the Markdown document. If
  263. // this override function is provided, the refid is passed to the override
  264. // function first, before consulting the defined refids at the bottom. If
  265. // the override function indicates an override did not occur, the refids at
  266. // the bottom will be used to fill in the link details.
  267. ReferenceOverride ReferenceOverrideFunc
  268. }
  269. // MarkdownBasic is a convenience function for simple rendering.
  270. // It processes markdown input with no extensions enabled.
  271. func MarkdownBasic(input []byte) []byte {
  272. // set up the HTML renderer
  273. htmlFlags := HTML_USE_XHTML
  274. renderer := HtmlRenderer(htmlFlags, "", "")
  275. // set up the parser
  276. return MarkdownOptions(input, renderer, Options{Extensions: 0})
  277. }
  278. // Call Markdown with most useful extensions enabled
  279. // MarkdownCommon is a convenience function for simple rendering.
  280. // It processes markdown input with common extensions enabled, including:
  281. //
  282. // * Smartypants processing with smart fractions and LaTeX dashes
  283. //
  284. // * Intra-word emphasis suppression
  285. //
  286. // * Tables
  287. //
  288. // * Fenced code blocks
  289. //
  290. // * Autolinking
  291. //
  292. // * Strikethrough support
  293. //
  294. // * Strict header parsing
  295. //
  296. // * Custom Header IDs
  297. func MarkdownCommon(input []byte) []byte {
  298. // set up the HTML renderer
  299. renderer := HtmlRenderer(commonHtmlFlags, "", "")
  300. return MarkdownOptions(input, renderer, Options{
  301. Extensions: commonExtensions})
  302. }
  303. // Markdown is the main rendering function.
  304. // It parses and renders a block of markdown-encoded text.
  305. // The supplied Renderer is used to format the output, and extensions dictates
  306. // which non-standard extensions are enabled.
  307. //
  308. // To use the supplied Html or LaTeX renderers, see HtmlRenderer and
  309. // LatexRenderer, respectively.
  310. func Markdown(input []byte, renderer Renderer, extensions int) []byte {
  311. return MarkdownOptions(input, renderer, Options{
  312. Extensions: extensions})
  313. }
  314. // MarkdownOptions is just like Markdown but takes additional options through
  315. // the Options struct.
  316. func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
  317. // no point in parsing if we can't render
  318. if renderer == nil {
  319. return nil
  320. }
  321. extensions := opts.Extensions
  322. // fill in the render structure
  323. p := new(parser)
  324. p.r = renderer
  325. p.flags = extensions
  326. p.refOverride = opts.ReferenceOverride
  327. p.refs = make(map[string]*reference)
  328. p.maxNesting = 16
  329. p.insideLink = false
  330. // register inline parsers
  331. p.inlineCallback['*'] = emphasis
  332. p.inlineCallback['_'] = emphasis
  333. if extensions&EXTENSION_STRIKETHROUGH != 0 {
  334. p.inlineCallback['~'] = emphasis
  335. }
  336. p.inlineCallback['`'] = codeSpan
  337. p.inlineCallback['\n'] = lineBreak
  338. p.inlineCallback['['] = link
  339. p.inlineCallback['<'] = leftAngle
  340. p.inlineCallback['\\'] = escape
  341. p.inlineCallback['&'] = entity
  342. if extensions&EXTENSION_AUTOLINK != 0 {
  343. p.inlineCallback[':'] = autoLink
  344. }
  345. if extensions&EXTENSION_FOOTNOTES != 0 {
  346. p.notes = make([]*reference, 0)
  347. p.notesRecord = make(map[string]struct{})
  348. }
  349. first := firstPass(p, input)
  350. second := secondPass(p, first)
  351. return second
  352. }
  353. // first pass:
  354. // - normalize newlines
  355. // - extract references (outside of fenced code blocks)
  356. // - expand tabs (outside of fenced code blocks)
  357. // - copy everything else
  358. func firstPass(p *parser, input []byte) []byte {
  359. var out bytes.Buffer
  360. tabSize := TAB_SIZE_DEFAULT
  361. if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
  362. tabSize = TAB_SIZE_EIGHT
  363. }
  364. beg := 0
  365. lastFencedCodeBlockEnd := 0
  366. for beg < len(input) {
  367. // Find end of this line, then process the line.
  368. end := beg
  369. for end < len(input) && input[end] != '\n' && input[end] != '\r' {
  370. end++
  371. }
  372. if p.flags&EXTENSION_FENCED_CODE != 0 {
  373. // track fenced code block boundaries to suppress tab expansion
  374. // and reference extraction inside them:
  375. if beg >= lastFencedCodeBlockEnd {
  376. if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 {
  377. lastFencedCodeBlockEnd = beg + i
  378. }
  379. }
  380. }
  381. // add the line body if present
  382. if end > beg {
  383. if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
  384. out.Write(input[beg:end])
  385. } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
  386. beg += refEnd
  387. continue
  388. } else {
  389. expandTabs(&out, input[beg:end], tabSize)
  390. }
  391. }
  392. if end < len(input) && input[end] == '\r' {
  393. end++
  394. }
  395. if end < len(input) && input[end] == '\n' {
  396. end++
  397. }
  398. out.WriteByte('\n')
  399. beg = end
  400. }
  401. // empty input?
  402. if out.Len() == 0 {
  403. out.WriteByte('\n')
  404. }
  405. return out.Bytes()
  406. }
  407. // second pass: actual rendering
  408. func secondPass(p *parser, input []byte) []byte {
  409. var output bytes.Buffer
  410. p.r.DocumentHeader(&output)
  411. p.block(&output, input)
  412. if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 {
  413. p.r.Footnotes(&output, func() bool {
  414. flags := LIST_ITEM_BEGINNING_OF_LIST
  415. for i := 0; i < len(p.notes); i += 1 {
  416. ref := p.notes[i]
  417. var buf bytes.Buffer
  418. if ref.hasBlock {
  419. flags |= LIST_ITEM_CONTAINS_BLOCK
  420. p.block(&buf, ref.title)
  421. } else {
  422. p.inline(&buf, ref.title)
  423. }
  424. p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags)
  425. flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK
  426. }
  427. return true
  428. })
  429. }
  430. p.r.DocumentFooter(&output)
  431. if p.nesting != 0 {
  432. panic("Nesting level did not end at zero")
  433. }
  434. return output.Bytes()
  435. }
  436. //
  437. // Link references
  438. //
  439. // This section implements support for references that (usually) appear
  440. // as footnotes in a document, and can be referenced anywhere in the document.
  441. // The basic format is:
  442. //
  443. // [1]: http://www.google.com/ "Google"
  444. // [2]: http://www.github.com/ "Github"
  445. //
  446. // Anywhere in the document, the reference can be linked by referring to its
  447. // label, i.e., 1 and 2 in this example, as in:
  448. //
  449. // This library is hosted on [Github][2], a git hosting site.
  450. //
  451. // Actual footnotes as specified in Pandoc and supported by some other Markdown
  452. // libraries such as php-markdown are also taken care of. They look like this:
  453. //
  454. // This sentence needs a bit of further explanation.[^note]
  455. //
  456. // [^note]: This is the explanation.
  457. //
  458. // Footnotes should be placed at the end of the document in an ordered list.
  459. // Inline footnotes such as:
  460. //
  461. // Inline footnotes^[Not supported.] also exist.
  462. //
  463. // are not yet supported.
  464. // References are parsed and stored in this struct.
  465. type reference struct {
  466. link []byte
  467. title []byte
  468. noteId int // 0 if not a footnote ref
  469. hasBlock bool
  470. text []byte
  471. }
  472. func (r *reference) String() string {
  473. return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}",
  474. r.link, r.title, r.text, r.noteId, r.hasBlock)
  475. }
  476. // Check whether or not data starts with a reference link.
  477. // If so, it is parsed and stored in the list of references
  478. // (in the render struct).
  479. // Returns the number of bytes to skip to move past it,
  480. // or zero if the first line is not a reference.
  481. func isReference(p *parser, data []byte, tabSize int) int {
  482. // up to 3 optional leading spaces
  483. if len(data) < 4 {
  484. return 0
  485. }
  486. i := 0
  487. for i < 3 && data[i] == ' ' {
  488. i++
  489. }
  490. noteId := 0
  491. // id part: anything but a newline between brackets
  492. if data[i] != '[' {
  493. return 0
  494. }
  495. i++
  496. if p.flags&EXTENSION_FOOTNOTES != 0 {
  497. if i < len(data) && data[i] == '^' {
  498. // we can set it to anything here because the proper noteIds will
  499. // be assigned later during the second pass. It just has to be != 0
  500. noteId = 1
  501. i++
  502. }
  503. }
  504. idOffset := i
  505. for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
  506. i++
  507. }
  508. if i >= len(data) || data[i] != ']' {
  509. return 0
  510. }
  511. idEnd := i
  512. // spacer: colon (space | tab)* newline? (space | tab)*
  513. i++
  514. if i >= len(data) || data[i] != ':' {
  515. return 0
  516. }
  517. i++
  518. for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
  519. i++
  520. }
  521. if i < len(data) && (data[i] == '\n' || data[i] == '\r') {
  522. i++
  523. if i < len(data) && data[i] == '\n' && data[i-1] == '\r' {
  524. i++
  525. }
  526. }
  527. for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
  528. i++
  529. }
  530. if i >= len(data) {
  531. return 0
  532. }
  533. var (
  534. linkOffset, linkEnd int
  535. titleOffset, titleEnd int
  536. lineEnd int
  537. raw []byte
  538. hasBlock bool
  539. )
  540. if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 {
  541. linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
  542. lineEnd = linkEnd
  543. } else {
  544. linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
  545. }
  546. if lineEnd == 0 {
  547. return 0
  548. }
  549. // a valid ref has been found
  550. ref := &reference{
  551. noteId: noteId,
  552. hasBlock: hasBlock,
  553. }
  554. if noteId > 0 {
  555. // reusing the link field for the id since footnotes don't have links
  556. ref.link = data[idOffset:idEnd]
  557. // if footnote, it's not really a title, it's the contained text
  558. ref.title = raw
  559. } else {
  560. ref.link = data[linkOffset:linkEnd]
  561. ref.title = data[titleOffset:titleEnd]
  562. }
  563. // id matches are case-insensitive
  564. id := string(bytes.ToLower(data[idOffset:idEnd]))
  565. p.refs[id] = ref
  566. return lineEnd
  567. }
  568. func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
  569. // link: whitespace-free sequence, optionally between angle brackets
  570. if data[i] == '<' {
  571. i++
  572. }
  573. linkOffset = i
  574. if i == len(data) {
  575. return
  576. }
  577. for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
  578. i++
  579. }
  580. linkEnd = i
  581. if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
  582. linkOffset++
  583. linkEnd--
  584. }
  585. // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' )
  586. for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
  587. i++
  588. }
  589. if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
  590. return
  591. }
  592. // compute end-of-line
  593. if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
  594. lineEnd = i
  595. }
  596. if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' {
  597. lineEnd++
  598. }
  599. // optional (space|tab)* spacer after a newline
  600. if lineEnd > 0 {
  601. i = lineEnd + 1
  602. for i < len(data) && (data[i] == ' ' || data[i] == '\t') {
  603. i++
  604. }
  605. }
  606. // optional title: any non-newline sequence enclosed in '"() alone on its line
  607. if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
  608. i++
  609. titleOffset = i
  610. // look for EOL
  611. for i < len(data) && data[i] != '\n' && data[i] != '\r' {
  612. i++
  613. }
  614. if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' {
  615. titleEnd = i + 1
  616. } else {
  617. titleEnd = i
  618. }
  619. // step back
  620. i--
  621. for i > titleOffset && (data[i] == ' ' || data[i] == '\t') {
  622. i--
  623. }
  624. if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') {
  625. lineEnd = titleEnd
  626. titleEnd = i
  627. }
  628. }
  629. return
  630. }
  631. // The first bit of this logic is the same as (*parser).listItem, but the rest
  632. // is much simpler. This function simply finds the entire block and shifts it
  633. // over by one tab if it is indeed a block (just returns the line if it's not).
  634. // blockEnd is the end of the section in the input buffer, and contents is the
  635. // extracted text that was shifted over one tab. It will need to be rendered at
  636. // the end of the document.
  637. func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
  638. if i == 0 || len(data) == 0 {
  639. return
  640. }
  641. // skip leading whitespace on first line
  642. for i < len(data) && data[i] == ' ' {
  643. i++
  644. }
  645. blockStart = i
  646. // find the end of the line
  647. blockEnd = i
  648. for i < len(data) && data[i-1] != '\n' {
  649. i++
  650. }
  651. // get working buffer
  652. var raw bytes.Buffer
  653. // put the first line into the working buffer
  654. raw.Write(data[blockEnd:i])
  655. blockEnd = i
  656. // process the following lines
  657. containsBlankLine := false
  658. gatherLines:
  659. for blockEnd < len(data) {
  660. i++
  661. // find the end of this line
  662. for i < len(data) && data[i-1] != '\n' {
  663. i++
  664. }
  665. // if it is an empty line, guess that it is part of this item
  666. // and move on to the next line
  667. if p.isEmpty(data[blockEnd:i]) > 0 {
  668. containsBlankLine = true
  669. blockEnd = i
  670. continue
  671. }
  672. n := 0
  673. if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
  674. // this is the end of the block.
  675. // we don't want to include this last line in the index.
  676. break gatherLines
  677. }
  678. // if there were blank lines before this one, insert a new one now
  679. if containsBlankLine {
  680. raw.WriteByte('\n')
  681. containsBlankLine = false
  682. }
  683. // get rid of that first tab, write to buffer
  684. raw.Write(data[blockEnd+n : i])
  685. hasBlock = true
  686. blockEnd = i
  687. }
  688. if data[blockEnd-1] != '\n' {
  689. raw.WriteByte('\n')
  690. }
  691. contents = raw.Bytes()
  692. return
  693. }
  694. //
  695. //
  696. // Miscellaneous helper functions
  697. //
  698. //
  699. // Test if a character is a punctuation symbol.
  700. // Taken from a private function in regexp in the stdlib.
  701. func ispunct(c byte) bool {
  702. for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
  703. if c == r {
  704. return true
  705. }
  706. }
  707. return false
  708. }
  709. // Test if a character is a whitespace character.
  710. func isspace(c byte) bool {
  711. return ishorizontalspace(c) || isverticalspace(c)
  712. }
  713. // Test if a character is a horizontal whitespace character.
  714. func ishorizontalspace(c byte) bool {
  715. return c == ' ' || c == '\t'
  716. }
  717. // Test if a character is a vertical whitespace character.
  718. func isverticalspace(c byte) bool {
  719. return c == '\n' || c == '\r' || c == '\f' || c == '\v'
  720. }
  721. // Test if a character is letter.
  722. func isletter(c byte) bool {
  723. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
  724. }
  725. // Test if a character is a letter or a digit.
  726. // TODO: check when this is looking for ASCII alnum and when it should use unicode
  727. func isalnum(c byte) bool {
  728. return (c >= '0' && c <= '9') || isletter(c)
  729. }
  730. // Replace tab characters with spaces, aligning to the next TAB_SIZE column.
  731. // always ends output with a newline
  732. func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
  733. // first, check for common cases: no tabs, or only tabs at beginning of line
  734. i, prefix := 0, 0
  735. slowcase := false
  736. for i = 0; i < len(line); i++ {
  737. if line[i] == '\t' {
  738. if prefix == i {
  739. prefix++
  740. } else {
  741. slowcase = true
  742. break
  743. }
  744. }
  745. }
  746. // no need to decode runes if all tabs are at the beginning of the line
  747. if !slowcase {
  748. for i = 0; i < prefix*tabSize; i++ {
  749. out.WriteByte(' ')
  750. }
  751. out.Write(line[prefix:])
  752. return
  753. }
  754. // the slow case: we need to count runes to figure out how
  755. // many spaces to insert for each tab
  756. column := 0
  757. i = 0
  758. for i < len(line) {
  759. start := i
  760. for i < len(line) && line[i] != '\t' {
  761. _, size := utf8.DecodeRune(line[i:])
  762. i += size
  763. column++
  764. }
  765. if i > start {
  766. out.Write(line[start:i])
  767. }
  768. if i >= len(line) {
  769. break
  770. }
  771. for {
  772. out.WriteByte(' ')
  773. column++
  774. if column%tabSize == 0 {
  775. break
  776. }
  777. }
  778. i++
  779. }
  780. }
  781. // Find if a line counts as indented or not.
  782. // Returns number of characters the indent is (0 = not indented).
  783. func isIndented(data []byte, indentSize int) int {
  784. if len(data) == 0 {
  785. return 0
  786. }
  787. if data[0] == '\t' {
  788. return 1
  789. }
  790. if len(data) < indentSize {
  791. return 0
  792. }
  793. for i := 0; i < indentSize; i++ {
  794. if data[i] != ' ' {
  795. return 0
  796. }
  797. }
  798. return indentSize
  799. }
  800. // Create a url-safe slug for fragments
  801. func slugify(in []byte) []byte {
  802. if len(in) == 0 {
  803. return in
  804. }
  805. out := make([]byte, 0, len(in))
  806. sym := false
  807. for _, ch := range in {
  808. if isalnum(ch) {
  809. sym = false
  810. out = append(out, ch)
  811. } else if sym {
  812. continue
  813. } else {
  814. out = append(out, '-')
  815. sym = true
  816. }
  817. }
  818. var a, b int
  819. var ch byte
  820. for a, ch = range out {
  821. if ch != '-' {
  822. break
  823. }
  824. }
  825. for b = len(out) - 1; b > 0; b-- {
  826. if out[b] != '-' {
  827. break
  828. }
  829. }
  830. return out[a : b+1]
  831. }