123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520 |
- // Copyright 2012-present Oliver Eilhard. All rights reserved.
- // Use of this source code is governed by a MIT-license.
- // See http://olivere.mit-license.org/license.txt for details.
-
- package elastic
-
- import (
- "context"
- "fmt"
- "net/http"
- "net/url"
- "strings"
-
- "github.com/olivere/elastic/v7/uritemplates"
- )
-
- // TermvectorsService returns information and statistics on terms in the
- // fields of a particular document. The document could be stored in the
- // index or artificially provided by the user.
- //
- // See https://www.elastic.co/guide/en/elasticsearch/reference/7.0/docs-termvectors.html
- // for documentation.
- type TermvectorsService struct {
- client *Client
-
- pretty *bool // pretty format the returned JSON response
- human *bool // return human readable values for statistics
- errorTrace *bool // include the stack trace of returned errors
- filterPath []string // list of filters used to reduce the response
- headers http.Header // custom request-level HTTP headers
-
- id string
- index string
- typ string
- dfs *bool
- doc interface{}
- fieldStatistics *bool
- fields []string
- filter *TermvectorsFilterSettings
- perFieldAnalyzer map[string]string
- offsets *bool
- parent string
- payloads *bool
- positions *bool
- preference string
- realtime *bool
- routing string
- termStatistics *bool
- version interface{}
- versionType string
- bodyJson interface{}
- bodyString string
- }
-
- // NewTermvectorsService creates a new TermvectorsService.
- func NewTermvectorsService(client *Client) *TermvectorsService {
- return &TermvectorsService{
- client: client,
- }
- }
-
- // Pretty tells Elasticsearch whether to return a formatted JSON response.
- func (s *TermvectorsService) Pretty(pretty bool) *TermvectorsService {
- s.pretty = &pretty
- return s
- }
-
- // Human specifies whether human readable values should be returned in
- // the JSON response, e.g. "7.5mb".
- func (s *TermvectorsService) Human(human bool) *TermvectorsService {
- s.human = &human
- return s
- }
-
- // ErrorTrace specifies whether to include the stack trace of returned errors.
- func (s *TermvectorsService) ErrorTrace(errorTrace bool) *TermvectorsService {
- s.errorTrace = &errorTrace
- return s
- }
-
- // FilterPath specifies a list of filters used to reduce the response.
- func (s *TermvectorsService) FilterPath(filterPath ...string) *TermvectorsService {
- s.filterPath = filterPath
- return s
- }
-
- // Header adds a header to the request.
- func (s *TermvectorsService) Header(name string, value string) *TermvectorsService {
- if s.headers == nil {
- s.headers = http.Header{}
- }
- s.headers.Add(name, value)
- return s
- }
-
- // Headers specifies the headers of the request.
- func (s *TermvectorsService) Headers(headers http.Header) *TermvectorsService {
- s.headers = headers
- return s
- }
-
- // Index in which the document resides.
- func (s *TermvectorsService) Index(index string) *TermvectorsService {
- s.index = index
- return s
- }
-
- // Type of the document.
- //
- // Deprecated: Types are in the process of being removed.
- func (s *TermvectorsService) Type(typ string) *TermvectorsService {
- s.typ = typ
- return s
- }
-
- // Id of the document.
- func (s *TermvectorsService) Id(id string) *TermvectorsService {
- s.id = id
- return s
- }
-
- // Dfs specifies if distributed frequencies should be returned instead
- // shard frequencies.
- func (s *TermvectorsService) Dfs(dfs bool) *TermvectorsService {
- s.dfs = &dfs
- return s
- }
-
- // Doc is the document to analyze.
- func (s *TermvectorsService) Doc(doc interface{}) *TermvectorsService {
- s.doc = doc
- return s
- }
-
- // FieldStatistics specifies if document count, sum of document frequencies
- // and sum of total term frequencies should be returned.
- func (s *TermvectorsService) FieldStatistics(fieldStatistics bool) *TermvectorsService {
- s.fieldStatistics = &fieldStatistics
- return s
- }
-
- // Fields a list of fields to return.
- func (s *TermvectorsService) Fields(fields ...string) *TermvectorsService {
- if s.fields == nil {
- s.fields = make([]string, 0)
- }
- s.fields = append(s.fields, fields...)
- return s
- }
-
- // Filter adds terms filter settings.
- func (s *TermvectorsService) Filter(filter *TermvectorsFilterSettings) *TermvectorsService {
- s.filter = filter
- return s
- }
-
- // PerFieldAnalyzer allows to specify a different analyzer than the one
- // at the field.
- func (s *TermvectorsService) PerFieldAnalyzer(perFieldAnalyzer map[string]string) *TermvectorsService {
- s.perFieldAnalyzer = perFieldAnalyzer
- return s
- }
-
- // Offsets specifies if term offsets should be returned.
- func (s *TermvectorsService) Offsets(offsets bool) *TermvectorsService {
- s.offsets = &offsets
- return s
- }
-
- // Parent id of documents.
- func (s *TermvectorsService) Parent(parent string) *TermvectorsService {
- s.parent = parent
- return s
- }
-
- // Payloads specifies if term payloads should be returned.
- func (s *TermvectorsService) Payloads(payloads bool) *TermvectorsService {
- s.payloads = &payloads
- return s
- }
-
- // Positions specifies if term positions should be returned.
- func (s *TermvectorsService) Positions(positions bool) *TermvectorsService {
- s.positions = &positions
- return s
- }
-
- // Preference specify the node or shard the operation
- // should be performed on (default: random).
- func (s *TermvectorsService) Preference(preference string) *TermvectorsService {
- s.preference = preference
- return s
- }
-
- // Realtime specifies if request is real-time as opposed to
- // near-real-time (default: true).
- func (s *TermvectorsService) Realtime(realtime bool) *TermvectorsService {
- s.realtime = &realtime
- return s
- }
-
- // Routing is a specific routing value.
- func (s *TermvectorsService) Routing(routing string) *TermvectorsService {
- s.routing = routing
- return s
- }
-
- // TermStatistics specifies if total term frequency and document frequency
- // should be returned.
- func (s *TermvectorsService) TermStatistics(termStatistics bool) *TermvectorsService {
- s.termStatistics = &termStatistics
- return s
- }
-
- // Version an explicit version number for concurrency control.
- func (s *TermvectorsService) Version(version interface{}) *TermvectorsService {
- s.version = version
- return s
- }
-
- // VersionType specifies a version type ("internal", "external", or "external_gte").
- func (s *TermvectorsService) VersionType(versionType string) *TermvectorsService {
- s.versionType = versionType
- return s
- }
-
- // BodyJson defines the body parameters. See documentation.
- func (s *TermvectorsService) BodyJson(body interface{}) *TermvectorsService {
- s.bodyJson = body
- return s
- }
-
- // BodyString defines the body parameters as a string. See documentation.
- func (s *TermvectorsService) BodyString(body string) *TermvectorsService {
- s.bodyString = body
- return s
- }
-
- // buildURL builds the URL for the operation.
- func (s *TermvectorsService) buildURL() (string, url.Values, error) {
- var pathParam = map[string]string{
- "index": s.index,
- }
- path := "/{index}"
- var err error
-
- if s.typ != "" {
- pathParam["type"] = s.typ
- path += "/{type}"
- } else {
- path += "/_termvectors"
- }
- if s.id != "" {
- pathParam["id"] = s.id
- path += "/{id}"
- }
- if s.typ != "" {
- path += "/_termvectors"
- }
-
- path, err = uritemplates.Expand(path, pathParam)
- if err != nil {
- return "", url.Values{}, err
- }
-
- // Add query string parameters
- params := url.Values{}
- if v := s.pretty; v != nil {
- params.Set("pretty", fmt.Sprint(*v))
- }
- if v := s.human; v != nil {
- params.Set("human", fmt.Sprint(*v))
- }
- if v := s.errorTrace; v != nil {
- params.Set("error_trace", fmt.Sprint(*v))
- }
- if len(s.filterPath) > 0 {
- params.Set("filter_path", strings.Join(s.filterPath, ","))
- }
- if v := s.dfs; v != nil {
- params.Set("dfs", fmt.Sprint(*v))
- }
- if v := s.fieldStatistics; v != nil {
- params.Set("field_statistics", fmt.Sprint(*v))
- }
- if len(s.fields) > 0 {
- params.Set("fields", strings.Join(s.fields, ","))
- }
- if v := s.offsets; v != nil {
- params.Set("offsets", fmt.Sprint(*v))
- }
- if s.parent != "" {
- params.Set("parent", s.parent)
- }
- if v := s.payloads; v != nil {
- params.Set("payloads", fmt.Sprint(*v))
- }
- if v := s.positions; v != nil {
- params.Set("positions", fmt.Sprint(*v))
- }
- if s.preference != "" {
- params.Set("preference", s.preference)
- }
- if v := s.realtime; v != nil {
- params.Set("realtime", fmt.Sprint(*v))
- }
- if s.routing != "" {
- params.Set("routing", s.routing)
- }
- if v := s.termStatistics; v != nil {
- params.Set("term_statistics", fmt.Sprint(*v))
- }
- if s.version != nil {
- params.Set("version", fmt.Sprintf("%v", s.version))
- }
- if s.versionType != "" {
- params.Set("version_type", s.versionType)
- }
- return path, params, nil
- }
-
- // Validate checks if the operation is valid.
- func (s *TermvectorsService) Validate() error {
- var invalid []string
- if s.index == "" {
- invalid = append(invalid, "Index")
- }
- if len(invalid) > 0 {
- return fmt.Errorf("missing required fields: %v", invalid)
- }
- return nil
- }
-
- // Do executes the operation.
- func (s *TermvectorsService) Do(ctx context.Context) (*TermvectorsResponse, error) {
- // Check pre-conditions
- if err := s.Validate(); err != nil {
- return nil, err
- }
-
- // Get URL for request
- path, params, err := s.buildURL()
- if err != nil {
- return nil, err
- }
-
- // Setup HTTP request body
- var body interface{}
- if s.bodyJson != nil {
- body = s.bodyJson
- } else if s.bodyString != "" {
- body = s.bodyString
- } else {
- data := make(map[string]interface{})
- if s.doc != nil {
- data["doc"] = s.doc
- }
- if len(s.perFieldAnalyzer) > 0 {
- data["per_field_analyzer"] = s.perFieldAnalyzer
- }
- if s.filter != nil {
- src, err := s.filter.Source()
- if err != nil {
- return nil, err
- }
- data["filter"] = src
- }
- if len(data) > 0 {
- body = data
- }
- }
-
- // Get HTTP response
- res, err := s.client.PerformRequest(ctx, PerformRequestOptions{
- Method: "GET",
- Path: path,
- Params: params,
- Body: body,
- Headers: s.headers,
- })
- if err != nil {
- return nil, err
- }
-
- // Return operation response
- ret := new(TermvectorsResponse)
- if err := s.client.decoder.Decode(res.Body, ret); err != nil {
- return nil, err
- }
- return ret, nil
- }
-
- // -- Filter settings --
-
- // TermvectorsFilterSettings adds additional filters to a Termsvector request.
- // It allows to filter terms based on their tf-idf scores.
- // See https://www.elastic.co/guide/en/elasticsearch/reference/7.0/docs-termvectors.html#_terms_filtering
- // for more information.
- type TermvectorsFilterSettings struct {
- maxNumTerms *int64
- minTermFreq *int64
- maxTermFreq *int64
- minDocFreq *int64
- maxDocFreq *int64
- minWordLength *int64
- maxWordLength *int64
- }
-
- // NewTermvectorsFilterSettings creates and initializes a new TermvectorsFilterSettings struct.
- func NewTermvectorsFilterSettings() *TermvectorsFilterSettings {
- return &TermvectorsFilterSettings{}
- }
-
- // MaxNumTerms specifies the maximum number of terms the must be returned per field.
- func (fs *TermvectorsFilterSettings) MaxNumTerms(value int64) *TermvectorsFilterSettings {
- fs.maxNumTerms = &value
- return fs
- }
-
- // MinTermFreq ignores words with less than this frequency in the source doc.
- func (fs *TermvectorsFilterSettings) MinTermFreq(value int64) *TermvectorsFilterSettings {
- fs.minTermFreq = &value
- return fs
- }
-
- // MaxTermFreq ignores words with more than this frequency in the source doc.
- func (fs *TermvectorsFilterSettings) MaxTermFreq(value int64) *TermvectorsFilterSettings {
- fs.maxTermFreq = &value
- return fs
- }
-
- // MinDocFreq ignores terms which do not occur in at least this many docs.
- func (fs *TermvectorsFilterSettings) MinDocFreq(value int64) *TermvectorsFilterSettings {
- fs.minDocFreq = &value
- return fs
- }
-
- // MaxDocFreq ignores terms which occur in more than this many docs.
- func (fs *TermvectorsFilterSettings) MaxDocFreq(value int64) *TermvectorsFilterSettings {
- fs.maxDocFreq = &value
- return fs
- }
-
- // MinWordLength specifies the minimum word length below which words will be ignored.
- func (fs *TermvectorsFilterSettings) MinWordLength(value int64) *TermvectorsFilterSettings {
- fs.minWordLength = &value
- return fs
- }
-
- // MaxWordLength specifies the maximum word length above which words will be ignored.
- func (fs *TermvectorsFilterSettings) MaxWordLength(value int64) *TermvectorsFilterSettings {
- fs.maxWordLength = &value
- return fs
- }
-
- // Source returns JSON for the query.
- func (fs *TermvectorsFilterSettings) Source() (interface{}, error) {
- source := make(map[string]interface{})
- if fs.maxNumTerms != nil {
- source["max_num_terms"] = *fs.maxNumTerms
- }
- if fs.minTermFreq != nil {
- source["min_term_freq"] = *fs.minTermFreq
- }
- if fs.maxTermFreq != nil {
- source["max_term_freq"] = *fs.maxTermFreq
- }
- if fs.minDocFreq != nil {
- source["min_doc_freq"] = *fs.minDocFreq
- }
- if fs.maxDocFreq != nil {
- source["max_doc_freq"] = *fs.maxDocFreq
- }
- if fs.minWordLength != nil {
- source["min_word_length"] = *fs.minWordLength
- }
- if fs.maxWordLength != nil {
- source["max_word_length"] = *fs.maxWordLength
- }
- return source, nil
- }
-
- // -- Response types --
-
- type TokenInfo struct {
- StartOffset int64 `json:"start_offset"`
- EndOffset int64 `json:"end_offset"`
- Position int64 `json:"position"`
- Payload string `json:"payload"`
- }
-
- type TermsInfo struct {
- DocFreq int64 `json:"doc_freq"`
- Score float64 `json:"score"`
- TermFreq int64 `json:"term_freq"`
- Ttf int64 `json:"ttf"`
- Tokens []TokenInfo `json:"tokens"`
- }
-
- type FieldStatistics struct {
- DocCount int64 `json:"doc_count"`
- SumDocFreq int64 `json:"sum_doc_freq"`
- SumTtf int64 `json:"sum_ttf"`
- }
-
- type TermVectorsFieldInfo struct {
- FieldStatistics FieldStatistics `json:"field_statistics"`
- Terms map[string]TermsInfo `json:"terms"`
- }
-
- // TermvectorsResponse is the response of TermvectorsService.Do.
- type TermvectorsResponse struct {
- Index string `json:"_index"`
- Type string `json:"_type"`
- Id string `json:"_id,omitempty"`
- Version int `json:"_version"`
- Found bool `json:"found"`
- Took int64 `json:"took"`
- TermVectors map[string]TermVectorsFieldInfo `json:"term_vectors"`
- }
|