You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

delegate.go 3.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. package chroma
  2. import (
  3. "bytes"
  4. )
  5. type delegatingLexer struct {
  6. root Lexer
  7. language Lexer
  8. }
  9. // DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
  10. // inside HTML or PHP inside plain text.
  11. //
  12. // It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
  13. // lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
  14. // Finally, these two sets of tokens are merged.
  15. //
  16. // The lexers from the template lexer package use this base lexer.
  17. func DelegatingLexer(root Lexer, language Lexer) Lexer {
  18. return &delegatingLexer{
  19. root: root,
  20. language: language,
  21. }
  22. }
  23. func (d *delegatingLexer) Config() *Config {
  24. return d.language.Config()
  25. }
  26. // An insertion is the character range where language tokens should be inserted.
  27. type insertion struct {
  28. start, end int
  29. tokens []Token
  30. }
  31. func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
  32. tokens, err := Tokenise(Coalesce(d.language), options, text)
  33. if err != nil {
  34. return nil, err
  35. }
  36. // Compute insertions and gather "Other" tokens.
  37. others := &bytes.Buffer{}
  38. insertions := []*insertion{}
  39. var insert *insertion
  40. offset := 0
  41. var last Token
  42. for _, t := range tokens {
  43. if t.Type == Other {
  44. if last != EOF && insert != nil && last.Type != Other {
  45. insert.end = offset
  46. }
  47. others.WriteString(t.Value)
  48. } else {
  49. if last == EOF || last.Type == Other {
  50. insert = &insertion{start: offset}
  51. insertions = append(insertions, insert)
  52. }
  53. insert.tokens = append(insert.tokens, t)
  54. }
  55. last = t
  56. offset += len(t.Value)
  57. }
  58. if len(insertions) == 0 {
  59. return d.root.Tokenise(options, text)
  60. }
  61. // Lex the other tokens.
  62. rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
  63. if err != nil {
  64. return nil, err
  65. }
  66. // Interleave the two sets of tokens.
  67. var out []Token
  68. offset = 0 // Offset into text.
  69. tokenIndex := 0
  70. nextToken := func() Token {
  71. if tokenIndex >= len(rootTokens) {
  72. return EOF
  73. }
  74. t := rootTokens[tokenIndex]
  75. tokenIndex++
  76. return t
  77. }
  78. insertionIndex := 0
  79. nextInsertion := func() *insertion {
  80. if insertionIndex >= len(insertions) {
  81. return nil
  82. }
  83. i := insertions[insertionIndex]
  84. insertionIndex++
  85. return i
  86. }
  87. t := nextToken()
  88. i := nextInsertion()
  89. for t != EOF || i != nil {
  90. // fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
  91. if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
  92. var l Token
  93. l, t = splitToken(t, i.start-offset)
  94. if l != EOF {
  95. out = append(out, l)
  96. offset += len(l.Value)
  97. }
  98. out = append(out, i.tokens...)
  99. offset += i.end - i.start
  100. if t == EOF {
  101. t = nextToken()
  102. }
  103. i = nextInsertion()
  104. } else {
  105. out = append(out, t)
  106. offset += len(t.Value)
  107. t = nextToken()
  108. }
  109. }
  110. return Literator(out...), nil
  111. }
  112. func splitToken(t Token, offset int) (l Token, r Token) {
  113. if t == EOF {
  114. return EOF, EOF
  115. }
  116. if offset == 0 {
  117. return EOF, t
  118. }
  119. if offset == len(t.Value) {
  120. return t, EOF
  121. }
  122. l = t.Clone()
  123. r = t.Clone()
  124. l.Value = l.Value[:offset]
  125. r.Value = r.Value[offset:]
  126. return
  127. }