You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

highlight.go 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. // Copyright 2015 The Gogs Authors. All rights reserved.
  2. // Copyright 2020 The Gitea Authors. All rights reserved.
  3. // SPDX-License-Identifier: MIT
  4. package highlight
  5. import (
  6. "bufio"
  7. "bytes"
  8. "fmt"
  9. gohtml "html"
  10. "html/template"
  11. "io"
  12. "path/filepath"
  13. "strings"
  14. "sync"
  15. "code.gitea.io/gitea/modules/analyze"
  16. "code.gitea.io/gitea/modules/log"
  17. "code.gitea.io/gitea/modules/setting"
  18. "code.gitea.io/gitea/modules/util"
  19. "github.com/alecthomas/chroma/v2"
  20. "github.com/alecthomas/chroma/v2/formatters/html"
  21. "github.com/alecthomas/chroma/v2/lexers"
  22. "github.com/alecthomas/chroma/v2/styles"
  23. lru "github.com/hashicorp/golang-lru/v2"
  24. )
  25. // don't index files larger than this many bytes for performance purposes
  26. const sizeLimit = 1024 * 1024
  27. var (
  28. // For custom user mapping
  29. highlightMapping = map[string]string{}
  30. once sync.Once
  31. cache *lru.TwoQueueCache[string, any]
  32. githubStyles = styles.Get("github")
  33. )
  34. // NewContext loads custom highlight map from local config
  35. func NewContext() {
  36. once.Do(func() {
  37. highlightMapping = setting.GetHighlightMapping()
  38. // The size 512 is simply a conservative rule of thumb
  39. c, err := lru.New2Q[string, any](512)
  40. if err != nil {
  41. panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
  42. }
  43. cache = c
  44. })
  45. }
  46. // Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
  47. func Code(fileName, language, code string) (output template.HTML, lexerName string) {
  48. NewContext()
  49. // diff view newline will be passed as empty, change to literal '\n' so it can be copied
  50. // preserve literal newline in blame view
  51. if code == "" || code == "\n" {
  52. return "\n", ""
  53. }
  54. if len(code) > sizeLimit {
  55. return template.HTML(template.HTMLEscapeString(code)), ""
  56. }
  57. var lexer chroma.Lexer
  58. if len(language) > 0 {
  59. lexer = lexers.Get(language)
  60. if lexer == nil {
  61. // Attempt stripping off the '?'
  62. if idx := strings.IndexByte(language, '?'); idx > 0 {
  63. lexer = lexers.Get(language[:idx])
  64. }
  65. }
  66. }
  67. if lexer == nil {
  68. if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
  69. // use mapped value to find lexer
  70. lexer = lexers.Get(val)
  71. }
  72. }
  73. if lexer == nil {
  74. if l, ok := cache.Get(fileName); ok {
  75. lexer = l.(chroma.Lexer)
  76. }
  77. }
  78. if lexer == nil {
  79. lexer = lexers.Match(fileName)
  80. if lexer == nil {
  81. lexer = lexers.Fallback
  82. }
  83. cache.Add(fileName, lexer)
  84. }
  85. return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
  86. }
  87. // CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
  88. func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
  89. formatter := html.New(html.WithClasses(true),
  90. html.WithLineNumbers(false),
  91. html.PreventSurroundingPre(true),
  92. )
  93. htmlbuf := bytes.Buffer{}
  94. htmlw := bufio.NewWriter(&htmlbuf)
  95. iterator, err := lexer.Tokenise(nil, code)
  96. if err != nil {
  97. log.Error("Can't tokenize code: %v", err)
  98. return template.HTML(template.HTMLEscapeString(code))
  99. }
  100. // style not used for live site but need to pass something
  101. err = formatter.Format(htmlw, githubStyles, iterator)
  102. if err != nil {
  103. log.Error("Can't format code: %v", err)
  104. return template.HTML(template.HTMLEscapeString(code))
  105. }
  106. _ = htmlw.Flush()
  107. // Chroma will add newlines for certain lexers in order to highlight them properly
  108. // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
  109. return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
  110. }
  111. // File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
  112. func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
  113. NewContext()
  114. if len(code) > sizeLimit {
  115. return PlainText(code), "", nil
  116. }
  117. formatter := html.New(html.WithClasses(true),
  118. html.WithLineNumbers(false),
  119. html.PreventSurroundingPre(true),
  120. )
  121. var lexer chroma.Lexer
  122. // provided language overrides everything
  123. if language != "" {
  124. lexer = lexers.Get(language)
  125. }
  126. if lexer == nil {
  127. if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
  128. lexer = lexers.Get(val)
  129. }
  130. }
  131. if lexer == nil {
  132. guessLanguage := analyze.GetCodeLanguage(fileName, code)
  133. lexer = lexers.Get(guessLanguage)
  134. if lexer == nil {
  135. lexer = lexers.Match(fileName)
  136. if lexer == nil {
  137. lexer = lexers.Fallback
  138. }
  139. }
  140. }
  141. lexerName := formatLexerName(lexer.Config().Name)
  142. iterator, err := lexer.Tokenise(nil, string(code))
  143. if err != nil {
  144. return nil, "", fmt.Errorf("can't tokenize code: %w", err)
  145. }
  146. tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
  147. htmlBuf := &bytes.Buffer{}
  148. lines := make([]template.HTML, 0, len(tokensLines))
  149. for _, tokens := range tokensLines {
  150. iterator = chroma.Literator(tokens...)
  151. err = formatter.Format(htmlBuf, githubStyles, iterator)
  152. if err != nil {
  153. return nil, "", fmt.Errorf("can't format code: %w", err)
  154. }
  155. lines = append(lines, template.HTML(htmlBuf.String()))
  156. htmlBuf.Reset()
  157. }
  158. return lines, lexerName, nil
  159. }
  160. // PlainText returns non-highlighted HTML for code
  161. func PlainText(code []byte) []template.HTML {
  162. r := bufio.NewReader(bytes.NewReader(code))
  163. m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
  164. for {
  165. content, err := r.ReadString('\n')
  166. if err != nil && err != io.EOF {
  167. log.Error("failed to read string from buffer: %v", err)
  168. break
  169. }
  170. if content == "" && err == io.EOF {
  171. break
  172. }
  173. s := template.HTML(gohtml.EscapeString(content))
  174. m = append(m, s)
  175. }
  176. return m
  177. }
  178. func formatLexerName(name string) string {
  179. if name == "fallback" {
  180. return "Plaintext"
  181. }
  182. return util.ToTitleCaseNoLower(name)
  183. }