You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

smartypants.go 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. //
  2. // Blackfriday Markdown Processor
  3. // Available at http://github.com/russross/blackfriday
  4. //
  5. // Copyright © 2011 Russ Ross <russ@russross.com>.
  6. // Distributed under the Simplified BSD License.
  7. // See README.md for details.
  8. //
  9. //
  10. //
  11. // SmartyPants rendering
  12. //
  13. //
  14. package blackfriday
  15. import (
  16. "bytes"
  17. )
  18. type smartypantsData struct {
  19. inSingleQuote bool
  20. inDoubleQuote bool
  21. }
  22. func wordBoundary(c byte) bool {
  23. return c == 0 || isspace(c) || ispunct(c)
  24. }
  25. func tolower(c byte) byte {
  26. if c >= 'A' && c <= 'Z' {
  27. return c - 'A' + 'a'
  28. }
  29. return c
  30. }
  31. func isdigit(c byte) bool {
  32. return c >= '0' && c <= '9'
  33. }
  34. func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool {
  35. // edge of the buffer is likely to be a tag that we don't get to see,
  36. // so we treat it like text sometimes
  37. // enumerate all sixteen possibilities for (previousChar, nextChar)
  38. // each can be one of {0, space, punct, other}
  39. switch {
  40. case previousChar == 0 && nextChar == 0:
  41. // context is not any help here, so toggle
  42. *isOpen = !*isOpen
  43. case isspace(previousChar) && nextChar == 0:
  44. // [ "] might be [ "<code>foo...]
  45. *isOpen = true
  46. case ispunct(previousChar) && nextChar == 0:
  47. // [!"] hmm... could be [Run!"] or [("<code>...]
  48. *isOpen = false
  49. case /* isnormal(previousChar) && */ nextChar == 0:
  50. // [a"] is probably a close
  51. *isOpen = false
  52. case previousChar == 0 && isspace(nextChar):
  53. // [" ] might be [...foo</code>" ]
  54. *isOpen = false
  55. case isspace(previousChar) && isspace(nextChar):
  56. // [ " ] context is not any help here, so toggle
  57. *isOpen = !*isOpen
  58. case ispunct(previousChar) && isspace(nextChar):
  59. // [!" ] is probably a close
  60. *isOpen = false
  61. case /* isnormal(previousChar) && */ isspace(nextChar):
  62. // [a" ] this is one of the easy cases
  63. *isOpen = false
  64. case previousChar == 0 && ispunct(nextChar):
  65. // ["!] hmm... could be ["$1.95] or [</code>"!...]
  66. *isOpen = false
  67. case isspace(previousChar) && ispunct(nextChar):
  68. // [ "!] looks more like [ "$1.95]
  69. *isOpen = true
  70. case ispunct(previousChar) && ispunct(nextChar):
  71. // [!"!] context is not any help here, so toggle
  72. *isOpen = !*isOpen
  73. case /* isnormal(previousChar) && */ ispunct(nextChar):
  74. // [a"!] is probably a close
  75. *isOpen = false
  76. case previousChar == 0 /* && isnormal(nextChar) */ :
  77. // ["a] is probably an open
  78. *isOpen = true
  79. case isspace(previousChar) /* && isnormal(nextChar) */ :
  80. // [ "a] this is one of the easy cases
  81. *isOpen = true
  82. case ispunct(previousChar) /* && isnormal(nextChar) */ :
  83. // [!"a] is probably an open
  84. *isOpen = true
  85. default:
  86. // [a'b] maybe a contraction?
  87. *isOpen = false
  88. }
  89. // Note that with the limited lookahead, this non-breaking
  90. // space will also be appended to single double quotes.
  91. if addNBSP && !*isOpen {
  92. out.WriteString("&nbsp;")
  93. }
  94. out.WriteByte('&')
  95. if *isOpen {
  96. out.WriteByte('l')
  97. } else {
  98. out.WriteByte('r')
  99. }
  100. out.WriteByte(quote)
  101. out.WriteString("quo;")
  102. if addNBSP && *isOpen {
  103. out.WriteString("&nbsp;")
  104. }
  105. return true
  106. }
  107. func smartSingleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  108. if len(text) >= 2 {
  109. t1 := tolower(text[1])
  110. if t1 == '\'' {
  111. nextChar := byte(0)
  112. if len(text) >= 3 {
  113. nextChar = text[2]
  114. }
  115. if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote, false) {
  116. return 1
  117. }
  118. }
  119. if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
  120. out.WriteString("&rsquo;")
  121. return 0
  122. }
  123. if len(text) >= 3 {
  124. t2 := tolower(text[2])
  125. if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
  126. (len(text) < 4 || wordBoundary(text[3])) {
  127. out.WriteString("&rsquo;")
  128. return 0
  129. }
  130. }
  131. }
  132. nextChar := byte(0)
  133. if len(text) > 1 {
  134. nextChar = text[1]
  135. }
  136. if smartQuoteHelper(out, previousChar, nextChar, 's', &smrt.inSingleQuote, false) {
  137. return 0
  138. }
  139. out.WriteByte(text[0])
  140. return 0
  141. }
  142. func smartParens(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  143. if len(text) >= 3 {
  144. t1 := tolower(text[1])
  145. t2 := tolower(text[2])
  146. if t1 == 'c' && t2 == ')' {
  147. out.WriteString("&copy;")
  148. return 2
  149. }
  150. if t1 == 'r' && t2 == ')' {
  151. out.WriteString("&reg;")
  152. return 2
  153. }
  154. if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
  155. out.WriteString("&trade;")
  156. return 3
  157. }
  158. }
  159. out.WriteByte(text[0])
  160. return 0
  161. }
  162. func smartDash(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  163. if len(text) >= 2 {
  164. if text[1] == '-' {
  165. out.WriteString("&mdash;")
  166. return 1
  167. }
  168. if wordBoundary(previousChar) && wordBoundary(text[1]) {
  169. out.WriteString("&ndash;")
  170. return 0
  171. }
  172. }
  173. out.WriteByte(text[0])
  174. return 0
  175. }
  176. func smartDashLatex(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  177. if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
  178. out.WriteString("&mdash;")
  179. return 2
  180. }
  181. if len(text) >= 2 && text[1] == '-' {
  182. out.WriteString("&ndash;")
  183. return 1
  184. }
  185. out.WriteByte(text[0])
  186. return 0
  187. }
  188. func smartAmpVariant(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte, quote byte, addNBSP bool) int {
  189. if bytes.HasPrefix(text, []byte("&quot;")) {
  190. nextChar := byte(0)
  191. if len(text) >= 7 {
  192. nextChar = text[6]
  193. }
  194. if smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote, addNBSP) {
  195. return 5
  196. }
  197. }
  198. if bytes.HasPrefix(text, []byte("&#0;")) {
  199. return 3
  200. }
  201. out.WriteByte('&')
  202. return 0
  203. }
  204. func smartAmp(angledQuotes, addNBSP bool) func(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  205. var quote byte = 'd'
  206. if angledQuotes {
  207. quote = 'a'
  208. }
  209. return func(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  210. return smartAmpVariant(out, smrt, previousChar, text, quote, addNBSP)
  211. }
  212. }
  213. func smartPeriod(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  214. if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
  215. out.WriteString("&hellip;")
  216. return 2
  217. }
  218. if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
  219. out.WriteString("&hellip;")
  220. return 4
  221. }
  222. out.WriteByte(text[0])
  223. return 0
  224. }
  225. func smartBacktick(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  226. if len(text) >= 2 && text[1] == '`' {
  227. nextChar := byte(0)
  228. if len(text) >= 3 {
  229. nextChar = text[2]
  230. }
  231. if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote, false) {
  232. return 1
  233. }
  234. }
  235. out.WriteByte(text[0])
  236. return 0
  237. }
  238. func smartNumberGeneric(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  239. if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
  240. // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
  241. // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
  242. // and avoid changing dates like 1/23/2005 into fractions.
  243. numEnd := 0
  244. for len(text) > numEnd && isdigit(text[numEnd]) {
  245. numEnd++
  246. }
  247. if numEnd == 0 {
  248. out.WriteByte(text[0])
  249. return 0
  250. }
  251. denStart := numEnd + 1
  252. if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
  253. denStart = numEnd + 3
  254. } else if len(text) < numEnd+2 || text[numEnd] != '/' {
  255. out.WriteByte(text[0])
  256. return 0
  257. }
  258. denEnd := denStart
  259. for len(text) > denEnd && isdigit(text[denEnd]) {
  260. denEnd++
  261. }
  262. if denEnd == denStart {
  263. out.WriteByte(text[0])
  264. return 0
  265. }
  266. if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
  267. out.WriteString("<sup>")
  268. out.Write(text[:numEnd])
  269. out.WriteString("</sup>&frasl;<sub>")
  270. out.Write(text[denStart:denEnd])
  271. out.WriteString("</sub>")
  272. return denEnd - 1
  273. }
  274. }
  275. out.WriteByte(text[0])
  276. return 0
  277. }
  278. func smartNumber(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  279. if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
  280. if text[0] == '1' && text[1] == '/' && text[2] == '2' {
  281. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
  282. out.WriteString("&frac12;")
  283. return 2
  284. }
  285. }
  286. if text[0] == '1' && text[1] == '/' && text[2] == '4' {
  287. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
  288. out.WriteString("&frac14;")
  289. return 2
  290. }
  291. }
  292. if text[0] == '3' && text[1] == '/' && text[2] == '4' {
  293. if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
  294. out.WriteString("&frac34;")
  295. return 2
  296. }
  297. }
  298. }
  299. out.WriteByte(text[0])
  300. return 0
  301. }
  302. func smartDoubleQuoteVariant(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte, quote byte) int {
  303. nextChar := byte(0)
  304. if len(text) > 1 {
  305. nextChar = text[1]
  306. }
  307. if !smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote, false) {
  308. out.WriteString("&quot;")
  309. }
  310. return 0
  311. }
  312. func smartDoubleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  313. return smartDoubleQuoteVariant(out, smrt, previousChar, text, 'd')
  314. }
  315. func smartAngledDoubleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  316. return smartDoubleQuoteVariant(out, smrt, previousChar, text, 'a')
  317. }
  318. func smartLeftAngle(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
  319. i := 0
  320. for i < len(text) && text[i] != '>' {
  321. i++
  322. }
  323. out.Write(text[:i+1])
  324. return i
  325. }
  326. type smartCallback func(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int
  327. type smartypantsRenderer [256]smartCallback
  328. var (
  329. smartAmpAngled = smartAmp(true, false)
  330. smartAmpAngledNBSP = smartAmp(true, true)
  331. smartAmpRegular = smartAmp(false, false)
  332. smartAmpRegularNBSP = smartAmp(false, true)
  333. )
  334. func smartypants(flags int) *smartypantsRenderer {
  335. r := new(smartypantsRenderer)
  336. addNBSP := flags&HTML_SMARTYPANTS_QUOTES_NBSP != 0
  337. if flags&HTML_SMARTYPANTS_ANGLED_QUOTES == 0 {
  338. r['"'] = smartDoubleQuote
  339. if !addNBSP {
  340. r['&'] = smartAmpRegular
  341. } else {
  342. r['&'] = smartAmpRegularNBSP
  343. }
  344. } else {
  345. r['"'] = smartAngledDoubleQuote
  346. if !addNBSP {
  347. r['&'] = smartAmpAngled
  348. } else {
  349. r['&'] = smartAmpAngledNBSP
  350. }
  351. }
  352. r['\''] = smartSingleQuote
  353. r['('] = smartParens
  354. if flags&HTML_SMARTYPANTS_DASHES != 0 {
  355. if flags&HTML_SMARTYPANTS_LATEX_DASHES == 0 {
  356. r['-'] = smartDash
  357. } else {
  358. r['-'] = smartDashLatex
  359. }
  360. }
  361. r['.'] = smartPeriod
  362. if flags&HTML_SMARTYPANTS_FRACTIONS == 0 {
  363. r['1'] = smartNumber
  364. r['3'] = smartNumber
  365. } else {
  366. for ch := '1'; ch <= '9'; ch++ {
  367. r[ch] = smartNumberGeneric
  368. }
  369. }
  370. r['<'] = smartLeftAngle
  371. r['`'] = smartBacktick
  372. return r
  373. }