You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

highlight.go 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. // Copyright 2015 The Gogs Authors. All rights reserved.
  2. // Copyright 2020 The Gitea Authors. All rights reserved.
  3. // SPDX-License-Identifier: MIT
  4. package highlight
  5. import (
  6. "bufio"
  7. "bytes"
  8. "fmt"
  9. gohtml "html"
  10. "io"
  11. "path/filepath"
  12. "strings"
  13. "sync"
  14. "code.gitea.io/gitea/modules/analyze"
  15. "code.gitea.io/gitea/modules/log"
  16. "code.gitea.io/gitea/modules/setting"
  17. "code.gitea.io/gitea/modules/util"
  18. "github.com/alecthomas/chroma/v2"
  19. "github.com/alecthomas/chroma/v2/formatters/html"
  20. "github.com/alecthomas/chroma/v2/lexers"
  21. "github.com/alecthomas/chroma/v2/styles"
  22. lru "github.com/hashicorp/golang-lru"
  23. )
  24. // don't index files larger than this many bytes for performance purposes
  25. const sizeLimit = 1024 * 1024
  26. var (
  27. // For custom user mapping
  28. highlightMapping = map[string]string{}
  29. once sync.Once
  30. cache *lru.TwoQueueCache
  31. githubStyles = styles.Get("github")
  32. )
  33. // NewContext loads custom highlight map from local config
  34. func NewContext() {
  35. once.Do(func() {
  36. highlightMapping = setting.GetHighlightMapping()
  37. // The size 512 is simply a conservative rule of thumb
  38. c, err := lru.New2Q(512)
  39. if err != nil {
  40. panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
  41. }
  42. cache = c
  43. })
  44. }
  45. // Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
  46. func Code(fileName, language, code string) (string, string) {
  47. NewContext()
  48. // diff view newline will be passed as empty, change to literal '\n' so it can be copied
  49. // preserve literal newline in blame view
  50. if code == "" || code == "\n" {
  51. return "\n", ""
  52. }
  53. if len(code) > sizeLimit {
  54. return code, ""
  55. }
  56. var lexer chroma.Lexer
  57. if len(language) > 0 {
  58. lexer = lexers.Get(language)
  59. if lexer == nil {
  60. // Attempt stripping off the '?'
  61. if idx := strings.IndexByte(language, '?'); idx > 0 {
  62. lexer = lexers.Get(language[:idx])
  63. }
  64. }
  65. }
  66. if lexer == nil {
  67. if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
  68. // use mapped value to find lexer
  69. lexer = lexers.Get(val)
  70. }
  71. }
  72. if lexer == nil {
  73. if l, ok := cache.Get(fileName); ok {
  74. lexer = l.(chroma.Lexer)
  75. }
  76. }
  77. if lexer == nil {
  78. lexer = lexers.Match(fileName)
  79. if lexer == nil {
  80. lexer = lexers.Fallback
  81. }
  82. cache.Add(fileName, lexer)
  83. }
  84. lexerName := formatLexerName(lexer.Config().Name)
  85. return CodeFromLexer(lexer, code), lexerName
  86. }
  87. // CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
  88. func CodeFromLexer(lexer chroma.Lexer, code string) string {
  89. formatter := html.New(html.WithClasses(true),
  90. html.WithLineNumbers(false),
  91. html.PreventSurroundingPre(true),
  92. )
  93. htmlbuf := bytes.Buffer{}
  94. htmlw := bufio.NewWriter(&htmlbuf)
  95. iterator, err := lexer.Tokenise(nil, code)
  96. if err != nil {
  97. log.Error("Can't tokenize code: %v", err)
  98. return code
  99. }
  100. // style not used for live site but need to pass something
  101. err = formatter.Format(htmlw, githubStyles, iterator)
  102. if err != nil {
  103. log.Error("Can't format code: %v", err)
  104. return code
  105. }
  106. _ = htmlw.Flush()
  107. // Chroma will add newlines for certain lexers in order to highlight them properly
  108. // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
  109. return strings.TrimSuffix(htmlbuf.String(), "\n")
  110. }
  111. // File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
  112. func File(fileName, language string, code []byte) ([]string, string, error) {
  113. NewContext()
  114. if len(code) > sizeLimit {
  115. return PlainText(code), "", nil
  116. }
  117. formatter := html.New(html.WithClasses(true),
  118. html.WithLineNumbers(false),
  119. html.PreventSurroundingPre(true),
  120. )
  121. var lexer chroma.Lexer
  122. // provided language overrides everything
  123. if language != "" {
  124. lexer = lexers.Get(language)
  125. }
  126. if lexer == nil {
  127. if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
  128. lexer = lexers.Get(val)
  129. }
  130. }
  131. if lexer == nil {
  132. guessLanguage := analyze.GetCodeLanguage(fileName, code)
  133. lexer = lexers.Get(guessLanguage)
  134. if lexer == nil {
  135. lexer = lexers.Match(fileName)
  136. if lexer == nil {
  137. lexer = lexers.Fallback
  138. }
  139. }
  140. }
  141. lexerName := formatLexerName(lexer.Config().Name)
  142. iterator, err := lexer.Tokenise(nil, string(code))
  143. if err != nil {
  144. return nil, "", fmt.Errorf("can't tokenize code: %w", err)
  145. }
  146. tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
  147. htmlBuf := &bytes.Buffer{}
  148. lines := make([]string, 0, len(tokensLines))
  149. for _, tokens := range tokensLines {
  150. iterator = chroma.Literator(tokens...)
  151. err = formatter.Format(htmlBuf, githubStyles, iterator)
  152. if err != nil {
  153. return nil, "", fmt.Errorf("can't format code: %w", err)
  154. }
  155. lines = append(lines, htmlBuf.String())
  156. htmlBuf.Reset()
  157. }
  158. return lines, lexerName, nil
  159. }
  160. // PlainText returns non-highlighted HTML for code
  161. func PlainText(code []byte) []string {
  162. r := bufio.NewReader(bytes.NewReader(code))
  163. m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
  164. for {
  165. content, err := r.ReadString('\n')
  166. if err != nil && err != io.EOF {
  167. log.Error("failed to read string from buffer: %v", err)
  168. break
  169. }
  170. if content == "" && err == io.EOF {
  171. break
  172. }
  173. s := gohtml.EscapeString(content)
  174. m = append(m, s)
  175. }
  176. return m
  177. }
  178. func formatLexerName(name string) string {
  179. if name == "fallback" {
  180. return "Plaintext"
  181. }
  182. return util.ToTitleCaseNoLower(name)
  183. }