You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

termvectors.go 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. // Copyright 2012-present Oliver Eilhard. All rights reserved.
  2. // Use of this source code is governed by a MIT-license.
  3. // See http://olivere.mit-license.org/license.txt for details.
  4. package elastic
  5. import (
  6. "context"
  7. "fmt"
  8. "net/http"
  9. "net/url"
  10. "strings"
  11. "github.com/olivere/elastic/v7/uritemplates"
  12. )
  13. // TermvectorsService returns information and statistics on terms in the
  14. // fields of a particular document. The document could be stored in the
  15. // index or artificially provided by the user.
  16. //
  17. // See https://www.elastic.co/guide/en/elasticsearch/reference/7.0/docs-termvectors.html
  18. // for documentation.
  19. type TermvectorsService struct {
  20. client *Client
  21. pretty *bool // pretty format the returned JSON response
  22. human *bool // return human readable values for statistics
  23. errorTrace *bool // include the stack trace of returned errors
  24. filterPath []string // list of filters used to reduce the response
  25. headers http.Header // custom request-level HTTP headers
  26. id string
  27. index string
  28. typ string
  29. dfs *bool
  30. doc interface{}
  31. fieldStatistics *bool
  32. fields []string
  33. filter *TermvectorsFilterSettings
  34. perFieldAnalyzer map[string]string
  35. offsets *bool
  36. parent string
  37. payloads *bool
  38. positions *bool
  39. preference string
  40. realtime *bool
  41. routing string
  42. termStatistics *bool
  43. version interface{}
  44. versionType string
  45. bodyJson interface{}
  46. bodyString string
  47. }
  48. // NewTermvectorsService creates a new TermvectorsService.
  49. func NewTermvectorsService(client *Client) *TermvectorsService {
  50. return &TermvectorsService{
  51. client: client,
  52. }
  53. }
  54. // Pretty tells Elasticsearch whether to return a formatted JSON response.
  55. func (s *TermvectorsService) Pretty(pretty bool) *TermvectorsService {
  56. s.pretty = &pretty
  57. return s
  58. }
  59. // Human specifies whether human readable values should be returned in
  60. // the JSON response, e.g. "7.5mb".
  61. func (s *TermvectorsService) Human(human bool) *TermvectorsService {
  62. s.human = &human
  63. return s
  64. }
  65. // ErrorTrace specifies whether to include the stack trace of returned errors.
  66. func (s *TermvectorsService) ErrorTrace(errorTrace bool) *TermvectorsService {
  67. s.errorTrace = &errorTrace
  68. return s
  69. }
  70. // FilterPath specifies a list of filters used to reduce the response.
  71. func (s *TermvectorsService) FilterPath(filterPath ...string) *TermvectorsService {
  72. s.filterPath = filterPath
  73. return s
  74. }
  75. // Header adds a header to the request.
  76. func (s *TermvectorsService) Header(name string, value string) *TermvectorsService {
  77. if s.headers == nil {
  78. s.headers = http.Header{}
  79. }
  80. s.headers.Add(name, value)
  81. return s
  82. }
  83. // Headers specifies the headers of the request.
  84. func (s *TermvectorsService) Headers(headers http.Header) *TermvectorsService {
  85. s.headers = headers
  86. return s
  87. }
  88. // Index in which the document resides.
  89. func (s *TermvectorsService) Index(index string) *TermvectorsService {
  90. s.index = index
  91. return s
  92. }
  93. // Type of the document.
  94. //
  95. // Deprecated: Types are in the process of being removed.
  96. func (s *TermvectorsService) Type(typ string) *TermvectorsService {
  97. s.typ = typ
  98. return s
  99. }
  100. // Id of the document.
  101. func (s *TermvectorsService) Id(id string) *TermvectorsService {
  102. s.id = id
  103. return s
  104. }
  105. // Dfs specifies if distributed frequencies should be returned instead
  106. // shard frequencies.
  107. func (s *TermvectorsService) Dfs(dfs bool) *TermvectorsService {
  108. s.dfs = &dfs
  109. return s
  110. }
  111. // Doc is the document to analyze.
  112. func (s *TermvectorsService) Doc(doc interface{}) *TermvectorsService {
  113. s.doc = doc
  114. return s
  115. }
  116. // FieldStatistics specifies if document count, sum of document frequencies
  117. // and sum of total term frequencies should be returned.
  118. func (s *TermvectorsService) FieldStatistics(fieldStatistics bool) *TermvectorsService {
  119. s.fieldStatistics = &fieldStatistics
  120. return s
  121. }
  122. // Fields a list of fields to return.
  123. func (s *TermvectorsService) Fields(fields ...string) *TermvectorsService {
  124. if s.fields == nil {
  125. s.fields = make([]string, 0)
  126. }
  127. s.fields = append(s.fields, fields...)
  128. return s
  129. }
  130. // Filter adds terms filter settings.
  131. func (s *TermvectorsService) Filter(filter *TermvectorsFilterSettings) *TermvectorsService {
  132. s.filter = filter
  133. return s
  134. }
  135. // PerFieldAnalyzer allows to specify a different analyzer than the one
  136. // at the field.
  137. func (s *TermvectorsService) PerFieldAnalyzer(perFieldAnalyzer map[string]string) *TermvectorsService {
  138. s.perFieldAnalyzer = perFieldAnalyzer
  139. return s
  140. }
  141. // Offsets specifies if term offsets should be returned.
  142. func (s *TermvectorsService) Offsets(offsets bool) *TermvectorsService {
  143. s.offsets = &offsets
  144. return s
  145. }
  146. // Parent id of documents.
  147. func (s *TermvectorsService) Parent(parent string) *TermvectorsService {
  148. s.parent = parent
  149. return s
  150. }
  151. // Payloads specifies if term payloads should be returned.
  152. func (s *TermvectorsService) Payloads(payloads bool) *TermvectorsService {
  153. s.payloads = &payloads
  154. return s
  155. }
  156. // Positions specifies if term positions should be returned.
  157. func (s *TermvectorsService) Positions(positions bool) *TermvectorsService {
  158. s.positions = &positions
  159. return s
  160. }
  161. // Preference specify the node or shard the operation
  162. // should be performed on (default: random).
  163. func (s *TermvectorsService) Preference(preference string) *TermvectorsService {
  164. s.preference = preference
  165. return s
  166. }
  167. // Realtime specifies if request is real-time as opposed to
  168. // near-real-time (default: true).
  169. func (s *TermvectorsService) Realtime(realtime bool) *TermvectorsService {
  170. s.realtime = &realtime
  171. return s
  172. }
  173. // Routing is a specific routing value.
  174. func (s *TermvectorsService) Routing(routing string) *TermvectorsService {
  175. s.routing = routing
  176. return s
  177. }
  178. // TermStatistics specifies if total term frequency and document frequency
  179. // should be returned.
  180. func (s *TermvectorsService) TermStatistics(termStatistics bool) *TermvectorsService {
  181. s.termStatistics = &termStatistics
  182. return s
  183. }
  184. // Version an explicit version number for concurrency control.
  185. func (s *TermvectorsService) Version(version interface{}) *TermvectorsService {
  186. s.version = version
  187. return s
  188. }
  189. // VersionType specifies a version type ("internal", "external", or "external_gte").
  190. func (s *TermvectorsService) VersionType(versionType string) *TermvectorsService {
  191. s.versionType = versionType
  192. return s
  193. }
  194. // BodyJson defines the body parameters. See documentation.
  195. func (s *TermvectorsService) BodyJson(body interface{}) *TermvectorsService {
  196. s.bodyJson = body
  197. return s
  198. }
  199. // BodyString defines the body parameters as a string. See documentation.
  200. func (s *TermvectorsService) BodyString(body string) *TermvectorsService {
  201. s.bodyString = body
  202. return s
  203. }
  204. // buildURL builds the URL for the operation.
  205. func (s *TermvectorsService) buildURL() (string, url.Values, error) {
  206. var pathParam = map[string]string{
  207. "index": s.index,
  208. }
  209. path := "/{index}"
  210. var err error
  211. if s.typ != "" {
  212. pathParam["type"] = s.typ
  213. path += "/{type}"
  214. } else {
  215. path += "/_termvectors"
  216. }
  217. if s.id != "" {
  218. pathParam["id"] = s.id
  219. path += "/{id}"
  220. }
  221. if s.typ != "" {
  222. path += "/_termvectors"
  223. }
  224. path, err = uritemplates.Expand(path, pathParam)
  225. if err != nil {
  226. return "", url.Values{}, err
  227. }
  228. // Add query string parameters
  229. params := url.Values{}
  230. if v := s.pretty; v != nil {
  231. params.Set("pretty", fmt.Sprint(*v))
  232. }
  233. if v := s.human; v != nil {
  234. params.Set("human", fmt.Sprint(*v))
  235. }
  236. if v := s.errorTrace; v != nil {
  237. params.Set("error_trace", fmt.Sprint(*v))
  238. }
  239. if len(s.filterPath) > 0 {
  240. params.Set("filter_path", strings.Join(s.filterPath, ","))
  241. }
  242. if v := s.dfs; v != nil {
  243. params.Set("dfs", fmt.Sprint(*v))
  244. }
  245. if v := s.fieldStatistics; v != nil {
  246. params.Set("field_statistics", fmt.Sprint(*v))
  247. }
  248. if len(s.fields) > 0 {
  249. params.Set("fields", strings.Join(s.fields, ","))
  250. }
  251. if v := s.offsets; v != nil {
  252. params.Set("offsets", fmt.Sprint(*v))
  253. }
  254. if s.parent != "" {
  255. params.Set("parent", s.parent)
  256. }
  257. if v := s.payloads; v != nil {
  258. params.Set("payloads", fmt.Sprint(*v))
  259. }
  260. if v := s.positions; v != nil {
  261. params.Set("positions", fmt.Sprint(*v))
  262. }
  263. if s.preference != "" {
  264. params.Set("preference", s.preference)
  265. }
  266. if v := s.realtime; v != nil {
  267. params.Set("realtime", fmt.Sprint(*v))
  268. }
  269. if s.routing != "" {
  270. params.Set("routing", s.routing)
  271. }
  272. if v := s.termStatistics; v != nil {
  273. params.Set("term_statistics", fmt.Sprint(*v))
  274. }
  275. if s.version != nil {
  276. params.Set("version", fmt.Sprintf("%v", s.version))
  277. }
  278. if s.versionType != "" {
  279. params.Set("version_type", s.versionType)
  280. }
  281. return path, params, nil
  282. }
  283. // Validate checks if the operation is valid.
  284. func (s *TermvectorsService) Validate() error {
  285. var invalid []string
  286. if s.index == "" {
  287. invalid = append(invalid, "Index")
  288. }
  289. if len(invalid) > 0 {
  290. return fmt.Errorf("missing required fields: %v", invalid)
  291. }
  292. return nil
  293. }
  294. // Do executes the operation.
  295. func (s *TermvectorsService) Do(ctx context.Context) (*TermvectorsResponse, error) {
  296. // Check pre-conditions
  297. if err := s.Validate(); err != nil {
  298. return nil, err
  299. }
  300. // Get URL for request
  301. path, params, err := s.buildURL()
  302. if err != nil {
  303. return nil, err
  304. }
  305. // Setup HTTP request body
  306. var body interface{}
  307. if s.bodyJson != nil {
  308. body = s.bodyJson
  309. } else if s.bodyString != "" {
  310. body = s.bodyString
  311. } else {
  312. data := make(map[string]interface{})
  313. if s.doc != nil {
  314. data["doc"] = s.doc
  315. }
  316. if len(s.perFieldAnalyzer) > 0 {
  317. data["per_field_analyzer"] = s.perFieldAnalyzer
  318. }
  319. if s.filter != nil {
  320. src, err := s.filter.Source()
  321. if err != nil {
  322. return nil, err
  323. }
  324. data["filter"] = src
  325. }
  326. if len(data) > 0 {
  327. body = data
  328. }
  329. }
  330. // Get HTTP response
  331. res, err := s.client.PerformRequest(ctx, PerformRequestOptions{
  332. Method: "GET",
  333. Path: path,
  334. Params: params,
  335. Body: body,
  336. Headers: s.headers,
  337. })
  338. if err != nil {
  339. return nil, err
  340. }
  341. // Return operation response
  342. ret := new(TermvectorsResponse)
  343. if err := s.client.decoder.Decode(res.Body, ret); err != nil {
  344. return nil, err
  345. }
  346. return ret, nil
  347. }
  348. // -- Filter settings --
  349. // TermvectorsFilterSettings adds additional filters to a Termsvector request.
  350. // It allows to filter terms based on their tf-idf scores.
  351. // See https://www.elastic.co/guide/en/elasticsearch/reference/7.0/docs-termvectors.html#_terms_filtering
  352. // for more information.
  353. type TermvectorsFilterSettings struct {
  354. maxNumTerms *int64
  355. minTermFreq *int64
  356. maxTermFreq *int64
  357. minDocFreq *int64
  358. maxDocFreq *int64
  359. minWordLength *int64
  360. maxWordLength *int64
  361. }
  362. // NewTermvectorsFilterSettings creates and initializes a new TermvectorsFilterSettings struct.
  363. func NewTermvectorsFilterSettings() *TermvectorsFilterSettings {
  364. return &TermvectorsFilterSettings{}
  365. }
  366. // MaxNumTerms specifies the maximum number of terms the must be returned per field.
  367. func (fs *TermvectorsFilterSettings) MaxNumTerms(value int64) *TermvectorsFilterSettings {
  368. fs.maxNumTerms = &value
  369. return fs
  370. }
  371. // MinTermFreq ignores words with less than this frequency in the source doc.
  372. func (fs *TermvectorsFilterSettings) MinTermFreq(value int64) *TermvectorsFilterSettings {
  373. fs.minTermFreq = &value
  374. return fs
  375. }
  376. // MaxTermFreq ignores words with more than this frequency in the source doc.
  377. func (fs *TermvectorsFilterSettings) MaxTermFreq(value int64) *TermvectorsFilterSettings {
  378. fs.maxTermFreq = &value
  379. return fs
  380. }
  381. // MinDocFreq ignores terms which do not occur in at least this many docs.
  382. func (fs *TermvectorsFilterSettings) MinDocFreq(value int64) *TermvectorsFilterSettings {
  383. fs.minDocFreq = &value
  384. return fs
  385. }
  386. // MaxDocFreq ignores terms which occur in more than this many docs.
  387. func (fs *TermvectorsFilterSettings) MaxDocFreq(value int64) *TermvectorsFilterSettings {
  388. fs.maxDocFreq = &value
  389. return fs
  390. }
  391. // MinWordLength specifies the minimum word length below which words will be ignored.
  392. func (fs *TermvectorsFilterSettings) MinWordLength(value int64) *TermvectorsFilterSettings {
  393. fs.minWordLength = &value
  394. return fs
  395. }
  396. // MaxWordLength specifies the maximum word length above which words will be ignored.
  397. func (fs *TermvectorsFilterSettings) MaxWordLength(value int64) *TermvectorsFilterSettings {
  398. fs.maxWordLength = &value
  399. return fs
  400. }
  401. // Source returns JSON for the query.
  402. func (fs *TermvectorsFilterSettings) Source() (interface{}, error) {
  403. source := make(map[string]interface{})
  404. if fs.maxNumTerms != nil {
  405. source["max_num_terms"] = *fs.maxNumTerms
  406. }
  407. if fs.minTermFreq != nil {
  408. source["min_term_freq"] = *fs.minTermFreq
  409. }
  410. if fs.maxTermFreq != nil {
  411. source["max_term_freq"] = *fs.maxTermFreq
  412. }
  413. if fs.minDocFreq != nil {
  414. source["min_doc_freq"] = *fs.minDocFreq
  415. }
  416. if fs.maxDocFreq != nil {
  417. source["max_doc_freq"] = *fs.maxDocFreq
  418. }
  419. if fs.minWordLength != nil {
  420. source["min_word_length"] = *fs.minWordLength
  421. }
  422. if fs.maxWordLength != nil {
  423. source["max_word_length"] = *fs.maxWordLength
  424. }
  425. return source, nil
  426. }
  427. // -- Response types --
  428. type TokenInfo struct {
  429. StartOffset int64 `json:"start_offset"`
  430. EndOffset int64 `json:"end_offset"`
  431. Position int64 `json:"position"`
  432. Payload string `json:"payload"`
  433. }
  434. type TermsInfo struct {
  435. DocFreq int64 `json:"doc_freq"`
  436. Score float64 `json:"score"`
  437. TermFreq int64 `json:"term_freq"`
  438. Ttf int64 `json:"ttf"`
  439. Tokens []TokenInfo `json:"tokens"`
  440. }
  441. type FieldStatistics struct {
  442. DocCount int64 `json:"doc_count"`
  443. SumDocFreq int64 `json:"sum_doc_freq"`
  444. SumTtf int64 `json:"sum_ttf"`
  445. }
  446. type TermVectorsFieldInfo struct {
  447. FieldStatistics FieldStatistics `json:"field_statistics"`
  448. Terms map[string]TermsInfo `json:"terms"`
  449. }
  450. // TermvectorsResponse is the response of TermvectorsService.Do.
  451. type TermvectorsResponse struct {
  452. Index string `json:"_index"`
  453. Type string `json:"_type"`
  454. Id string `json:"_id,omitempty"`
  455. Version int `json:"_version"`
  456. Found bool `json:"found"`
  457. Took int64 `json:"took"`
  458. TermVectors map[string]TermVectorsFieldInfo `json:"term_vectors"`
  459. }