You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

highlighter_simple.go 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package simple
  15. import (
  16. "container/heap"
  17. "fmt"
  18. "github.com/blevesearch/bleve/document"
  19. "github.com/blevesearch/bleve/registry"
  20. "github.com/blevesearch/bleve/search"
  21. "github.com/blevesearch/bleve/search/highlight"
  22. )
  23. const Name = "simple"
  24. const DefaultSeparator = "…"
  25. type Highlighter struct {
  26. fragmenter highlight.Fragmenter
  27. formatter highlight.FragmentFormatter
  28. sep string
  29. }
  30. func NewHighlighter(fragmenter highlight.Fragmenter, formatter highlight.FragmentFormatter, separator string) *Highlighter {
  31. return &Highlighter{
  32. fragmenter: fragmenter,
  33. formatter: formatter,
  34. sep: separator,
  35. }
  36. }
  37. func (s *Highlighter) Fragmenter() highlight.Fragmenter {
  38. return s.fragmenter
  39. }
  40. func (s *Highlighter) SetFragmenter(f highlight.Fragmenter) {
  41. s.fragmenter = f
  42. }
  43. func (s *Highlighter) FragmentFormatter() highlight.FragmentFormatter {
  44. return s.formatter
  45. }
  46. func (s *Highlighter) SetFragmentFormatter(f highlight.FragmentFormatter) {
  47. s.formatter = f
  48. }
  49. func (s *Highlighter) Separator() string {
  50. return s.sep
  51. }
  52. func (s *Highlighter) SetSeparator(sep string) {
  53. s.sep = sep
  54. }
  55. func (s *Highlighter) BestFragmentInField(dm *search.DocumentMatch, doc *document.Document, field string) string {
  56. fragments := s.BestFragmentsInField(dm, doc, field, 1)
  57. if len(fragments) > 0 {
  58. return fragments[0]
  59. }
  60. return ""
  61. }
  62. func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *document.Document, field string, num int) []string {
  63. tlm := dm.Locations[field]
  64. orderedTermLocations := highlight.OrderTermLocations(tlm)
  65. scorer := NewFragmentScorer(tlm)
  66. // score the fragments and put them into a priority queue ordered by score
  67. fq := make(FragmentQueue, 0)
  68. heap.Init(&fq)
  69. for _, f := range doc.Fields {
  70. if f.Name() == field {
  71. _, ok := f.(*document.TextField)
  72. if ok {
  73. termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
  74. for _, otl := range orderedTermLocations {
  75. if otl.ArrayPositions.Equals(f.ArrayPositions()) {
  76. termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
  77. }
  78. }
  79. fieldData := f.Value()
  80. fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
  81. for _, fragment := range fragments {
  82. fragment.ArrayPositions = f.ArrayPositions()
  83. scorer.Score(fragment)
  84. heap.Push(&fq, fragment)
  85. }
  86. }
  87. }
  88. }
  89. // now find the N best non-overlapping fragments
  90. var bestFragments []*highlight.Fragment
  91. if len(fq) > 0 {
  92. candidate := heap.Pop(&fq)
  93. OUTER:
  94. for candidate != nil && len(bestFragments) < num {
  95. // see if this overlaps with any of the best already identified
  96. if len(bestFragments) > 0 {
  97. for _, frag := range bestFragments {
  98. if candidate.(*highlight.Fragment).Overlaps(frag) {
  99. if len(fq) < 1 {
  100. break OUTER
  101. }
  102. candidate = heap.Pop(&fq)
  103. continue OUTER
  104. }
  105. }
  106. bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
  107. } else {
  108. bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
  109. }
  110. if len(fq) < 1 {
  111. break
  112. }
  113. candidate = heap.Pop(&fq)
  114. }
  115. }
  116. // now that we have the best fragments, we can format them
  117. orderedTermLocations.MergeOverlapping()
  118. formattedFragments := make([]string, len(bestFragments))
  119. for i, fragment := range bestFragments {
  120. formattedFragments[i] = ""
  121. if fragment.Start != 0 {
  122. formattedFragments[i] += s.sep
  123. }
  124. formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations)
  125. if fragment.End != len(fragment.Orig) {
  126. formattedFragments[i] += s.sep
  127. }
  128. }
  129. if dm.Fragments == nil {
  130. dm.Fragments = make(search.FieldFragmentMap, 0)
  131. }
  132. if len(formattedFragments) > 0 {
  133. dm.Fragments[field] = formattedFragments
  134. }
  135. return formattedFragments
  136. }
  137. // FragmentQueue implements heap.Interface and holds Items.
  138. type FragmentQueue []*highlight.Fragment
  139. func (fq FragmentQueue) Len() int { return len(fq) }
  140. func (fq FragmentQueue) Less(i, j int) bool {
  141. // We want Pop to give us the highest, not lowest, priority so we use greater-than here.
  142. return fq[i].Score > fq[j].Score
  143. }
  144. func (fq FragmentQueue) Swap(i, j int) {
  145. fq[i], fq[j] = fq[j], fq[i]
  146. fq[i].Index = i
  147. fq[j].Index = j
  148. }
  149. func (fq *FragmentQueue) Push(x interface{}) {
  150. n := len(*fq)
  151. item := x.(*highlight.Fragment)
  152. item.Index = n
  153. *fq = append(*fq, item)
  154. }
  155. func (fq *FragmentQueue) Pop() interface{} {
  156. old := *fq
  157. n := len(old)
  158. item := old[n-1]
  159. item.Index = -1 // for safety
  160. *fq = old[0 : n-1]
  161. return item
  162. }
  163. func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
  164. separator := DefaultSeparator
  165. separatorVal, ok := config["separator"].(string)
  166. if ok {
  167. separator = separatorVal
  168. }
  169. fragmenterName, ok := config["fragmenter"].(string)
  170. if !ok {
  171. return nil, fmt.Errorf("must specify fragmenter")
  172. }
  173. fragmenter, err := cache.FragmenterNamed(fragmenterName)
  174. if err != nil {
  175. return nil, fmt.Errorf("error building fragmenter: %v", err)
  176. }
  177. formatterName, ok := config["formatter"].(string)
  178. if !ok {
  179. return nil, fmt.Errorf("must specify formatter")
  180. }
  181. formatter, err := cache.FragmentFormatterNamed(formatterName)
  182. if err != nil {
  183. return nil, fmt.Errorf("error building fragment formatter: %v", err)
  184. }
  185. return NewHighlighter(fragmenter, formatter, separator), nil
  186. }
  187. func init() {
  188. registry.RegisterHighlighter(Name, Constructor)
  189. }