* Improve issue indexer * Fix new issue sqlite bug * Different test indexer paths for each db * Add integration indexer paths to make cleantags/v1.3.0-rc1
@@ -53,5 +53,8 @@ coverage.all | |||
/integrations/gitea-integration-mysql | |||
/integrations/gitea-integration-pgsql | |||
/integrations/gitea-integration-sqlite | |||
/integrations/indexers-mysql | |||
/integrations/indexers-pgsql | |||
/integrations/indexers-sqlite | |||
/integrations/mysql.ini | |||
/integrations/pgsql.ini |
@@ -63,7 +63,11 @@ all: build | |||
.PHONY: clean | |||
clean: | |||
$(GO) clean -i ./... | |||
rm -rf $(EXECUTABLE) $(DIST) $(BINDATA) integrations*.test integrations/gitea-integration-pgsql/ integrations/gitea-integration-mysql/ integrations/gitea-integration-sqlite/ integrations/mysql.ini integrations/pgsql.ini | |||
rm -rf $(EXECUTABLE) $(DIST) $(BINDATA) \ | |||
integrations*.test \ | |||
integrations/gitea-integration-pgsql/ integrations/gitea-integration-mysql/ integrations/gitea-integration-sqlite/ \ | |||
integrations/indexers-mysql/ integrations/indexers-pgsql integrations/indexers-sqlite \ | |||
integrations/mysql.ini integrations/pgsql.ini | |||
required-gofmt-version: | |||
@$(GO) version | grep -q '\(1.7\|1.8\)' || { echo "We require go version 1.7 or 1.8 to format code" >&2 && exit 1; } |
@@ -57,7 +57,14 @@ func TestMain(m *testing.M) { | |||
fmt.Printf("Error initializing test database: %v\n", err) | |||
os.Exit(1) | |||
} | |||
os.Exit(m.Run()) | |||
exitCode := m.Run() | |||
if err = os.RemoveAll(setting.Indexer.IssuePath); err != nil { | |||
fmt.Printf("os.RemoveAll: %v\n", err) | |||
os.Exit(1) | |||
} | |||
os.Exit(exitCode) | |||
} | |||
func initIntegrationTest() { |
@@ -18,8 +18,10 @@ import ( | |||
"github.com/stretchr/testify/assert" | |||
) | |||
func getIssuesSelection(htmlDoc *HTMLDoc) *goquery.Selection { | |||
return htmlDoc.doc.Find(".issue.list").Find("li").Find(".title") | |||
func getIssuesSelection(t testing.TB, htmlDoc *HTMLDoc) *goquery.Selection { | |||
issueList := htmlDoc.doc.Find(".issue.list") | |||
assert.EqualValues(t, 1, issueList.Length()) | |||
return issueList.Find("li").Find(".title") | |||
} | |||
func getIssue(t *testing.T, repoID int64, issueSelection *goquery.Selection) *models.Issue { | |||
@@ -31,6 +33,18 @@ func getIssue(t *testing.T, repoID int64, issueSelection *goquery.Selection) *mo | |||
return models.AssertExistsAndLoadBean(t, &models.Issue{RepoID: repoID, Index: int64(index)}).(*models.Issue) | |||
} | |||
func assertMatch(t testing.TB, issue *models.Issue, keyword string) { | |||
matches := strings.Contains(strings.ToLower(issue.Title), keyword) || | |||
strings.Contains(strings.ToLower(issue.Content), keyword) | |||
for _, comment := range issue.Comments { | |||
matches = matches || strings.Contains( | |||
strings.ToLower(comment.Content), | |||
keyword, | |||
) | |||
} | |||
assert.True(t, matches) | |||
} | |||
func TestNoLoginViewIssues(t *testing.T) { | |||
prepareTestEnv(t) | |||
@@ -38,19 +52,18 @@ func TestNoLoginViewIssues(t *testing.T) { | |||
MakeRequest(t, req, http.StatusOK) | |||
} | |||
func TestNoLoginViewIssuesSortByType(t *testing.T) { | |||
func TestViewIssuesSortByType(t *testing.T) { | |||
prepareTestEnv(t) | |||
user := models.AssertExistsAndLoadBean(t, &models.User{ID: 1}).(*models.User) | |||
repo := models.AssertExistsAndLoadBean(t, &models.Repository{ID: 1}).(*models.Repository) | |||
repo.Owner = models.AssertExistsAndLoadBean(t, &models.User{ID: repo.OwnerID}).(*models.User) | |||
session := loginUser(t, user.Name) | |||
req := NewRequest(t, "GET", repo.RelLink()+"/issues?type=created_by") | |||
resp := session.MakeRequest(t, req, http.StatusOK) | |||
htmlDoc := NewHTMLParser(t, resp.Body) | |||
issuesSelection := getIssuesSelection(htmlDoc) | |||
issuesSelection := getIssuesSelection(t, htmlDoc) | |||
expectedNumIssues := models.GetCount(t, | |||
&models.Issue{RepoID: repo.ID, PosterID: user.ID}, | |||
models.Cond("is_closed=?", false), | |||
@@ -67,6 +80,26 @@ func TestNoLoginViewIssuesSortByType(t *testing.T) { | |||
}) | |||
} | |||
func TestViewIssuesKeyword(t *testing.T) { | |||
prepareTestEnv(t) | |||
repo := models.AssertExistsAndLoadBean(t, &models.Repository{ID: 1}).(*models.Repository) | |||
const keyword = "first" | |||
req := NewRequestf(t, "GET", "%s/issues?q=%s", repo.RelLink(), keyword) | |||
resp := MakeRequest(t, req, http.StatusOK) | |||
htmlDoc := NewHTMLParser(t, resp.Body) | |||
issuesSelection := getIssuesSelection(t, htmlDoc) | |||
assert.EqualValues(t, 1, issuesSelection.Length()) | |||
issuesSelection.Each(func(_ int, selection *goquery.Selection) { | |||
issue := getIssue(t, repo.ID, selection) | |||
assert.False(t, issue.IsClosed) | |||
assert.False(t, issue.IsPull) | |||
assertMatch(t, issue, keyword) | |||
}) | |||
} | |||
func TestNoLoginViewIssue(t *testing.T) { | |||
prepareTestEnv(t) | |||
@@ -10,6 +10,9 @@ PASSWD = {{TEST_MYSQL_PASSWORD}} | |||
SSL_MODE = disable | |||
PATH = data/gitea.db | |||
[indexer] | |||
ISSUE_INDEXER_PATH = integrations/indexers-mysql/issues.bleve | |||
[repository] | |||
ROOT = integrations/gitea-integration-mysql/gitea-repositories | |||
@@ -10,6 +10,9 @@ PASSWD = {{TEST_PGSQL_PASSWORD}} | |||
SSL_MODE = disable | |||
PATH = data/gitea.db | |||
[indexer] | |||
ISSUE_INDEXER_PATH = integrations/indexers-pgsql/issues.bleve | |||
[repository] | |||
ROOT = integrations/gitea-integration-pgsql/gitea-repositories | |||
@@ -5,6 +5,9 @@ RUN_MODE = prod | |||
DB_TYPE = sqlite3 | |||
PATH = :memory: | |||
[indexer] | |||
ISSUE_INDEXER_PATH = integrations/indexers-sqlite/issues.bleve | |||
[repository] | |||
ROOT = integrations/gitea-integration-sqlite/gitea-repositories | |||
@@ -5,7 +5,7 @@ | |||
poster_id: 1 | |||
assignee_id: 1 | |||
name: issue1 | |||
content: content1 | |||
content: content for the first issue | |||
is_closed: false | |||
is_pull: false | |||
num_comments: 2 | |||
@@ -18,7 +18,7 @@ | |||
index: 2 | |||
poster_id: 1 | |||
name: issue2 | |||
content: content2 | |||
content: content for the second issue | |||
milestone_id: 1 | |||
is_closed: false | |||
is_pull: true | |||
@@ -32,7 +32,7 @@ | |||
index: 3 | |||
poster_id: 1 | |||
name: issue3 | |||
content: content4 | |||
content: content for the third issue | |||
is_closed: false | |||
is_pull: true | |||
created_unix: 946684820 | |||
@@ -44,7 +44,7 @@ | |||
index: 1 | |||
poster_id: 2 | |||
name: issue4 | |||
content: content4 | |||
content: content for the fourth issue | |||
is_closed: true | |||
is_pull: false | |||
@@ -54,7 +54,7 @@ | |||
index: 4 | |||
poster_id: 2 | |||
name: issue5 | |||
content: content5 | |||
content: content for the fifth issue | |||
is_closed: true | |||
is_pull: false | |||
- |
@@ -155,6 +155,17 @@ func (issue *Issue) loadPullRequest(e Engine) (err error) { | |||
return nil | |||
} | |||
func (issue *Issue) loadComments(e Engine) (err error) { | |||
if issue.Comments != nil { | |||
return nil | |||
} | |||
issue.Comments, err = findComments(e, FindCommentsOptions{ | |||
IssueID: issue.ID, | |||
Type: CommentTypeUnknown, | |||
}) | |||
return err | |||
} | |||
func (issue *Issue) loadAttributes(e Engine) (err error) { | |||
if err = issue.loadRepo(e); err != nil { | |||
return | |||
@@ -191,14 +202,8 @@ func (issue *Issue) loadAttributes(e Engine) (err error) { | |||
} | |||
} | |||
if issue.Comments == nil { | |||
issue.Comments, err = findComments(e, FindCommentsOptions{ | |||
IssueID: issue.ID, | |||
Type: CommentTypeUnknown, | |||
}) | |||
if err != nil { | |||
return fmt.Errorf("getCommentsByIssueID [%d]: %v", issue.ID, err) | |||
} | |||
if err = issue.loadComments(e); err != nil { | |||
return | |||
} | |||
return nil | |||
@@ -577,7 +582,7 @@ func updateIssueCols(e Engine, issue *Issue, cols ...string) error { | |||
if _, err := e.Id(issue.ID).Cols(cols...).Update(issue); err != nil { | |||
return err | |||
} | |||
UpdateIssueIndexer(issue) | |||
UpdateIssueIndexer(issue.ID) | |||
return nil | |||
} | |||
@@ -907,8 +912,6 @@ func newIssue(e *xorm.Session, doer *User, opts NewIssueOptions) (err error) { | |||
return err | |||
} | |||
UpdateIssueIndexer(opts.Issue) | |||
if len(opts.Attachments) > 0 { | |||
attachments, err := getAttachmentsByUUIDs(e, opts.Attachments) | |||
if err != nil { | |||
@@ -947,6 +950,8 @@ func NewIssue(repo *Repository, issue *Issue, labelIDs []int64, uuids []string) | |||
return fmt.Errorf("Commit: %v", err) | |||
} | |||
UpdateIssueIndexer(issue.ID) | |||
if err = NotifyWatchers(&Action{ | |||
ActUserID: issue.Poster.ID, | |||
ActUser: issue.Poster, | |||
@@ -1448,7 +1453,7 @@ func updateIssue(e Engine, issue *Issue) error { | |||
if err != nil { | |||
return err | |||
} | |||
UpdateIssueIndexer(issue) | |||
UpdateIssueIndexer(issue.ID) | |||
return nil | |||
} | |||
@@ -520,7 +520,14 @@ func CreateComment(opts *CreateCommentOptions) (comment *Comment, err error) { | |||
return nil, err | |||
} | |||
return comment, sess.Commit() | |||
if err = sess.Commit(); err != nil { | |||
return nil, err | |||
} | |||
if opts.Type == CommentTypeComment { | |||
UpdateIssueIndexer(opts.Issue.ID) | |||
} | |||
return comment, nil | |||
} | |||
// CreateIssueComment creates a plain issue comment. | |||
@@ -645,8 +652,12 @@ func GetCommentsByRepoIDSince(repoID, since int64) ([]*Comment, error) { | |||
// UpdateComment updates information of comment. | |||
func UpdateComment(c *Comment) error { | |||
_, err := x.Id(c.ID).AllCols().Update(c) | |||
return err | |||
if _, err := x.Id(c.ID).AllCols().Update(c); err != nil { | |||
return err | |||
} else if c.Type == CommentTypeComment { | |||
UpdateIssueIndexer(c.IssueID) | |||
} | |||
return nil | |||
} | |||
// DeleteComment deletes the comment | |||
@@ -672,5 +683,10 @@ func DeleteComment(comment *Comment) error { | |||
return err | |||
} | |||
return sess.Commit() | |||
if err := sess.Commit(); err != nil { | |||
return err | |||
} else if comment.Type == CommentTypeComment { | |||
UpdateIssueIndexer(comment.IssueID) | |||
} | |||
return nil | |||
} |
@@ -6,112 +6,21 @@ package models | |||
import ( | |||
"fmt" | |||
"os" | |||
"strconv" | |||
"strings" | |||
"code.gitea.io/gitea/modules/indexer" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/modules/util" | |||
"github.com/blevesearch/bleve" | |||
"github.com/blevesearch/bleve/analysis/analyzer/simple" | |||
"github.com/blevesearch/bleve/search/query" | |||
) | |||
// issueIndexerUpdateQueue queue of issues that need to be updated in the issues | |||
// indexer | |||
var issueIndexerUpdateQueue chan *Issue | |||
// issueIndexer (thread-safe) index for searching issues | |||
var issueIndexer bleve.Index | |||
// issueIndexerData data stored in the issue indexer | |||
type issueIndexerData struct { | |||
ID int64 | |||
RepoID int64 | |||
Title string | |||
Content string | |||
} | |||
// numericQuery an numeric-equality query for the given value and field | |||
func numericQuery(value int64, field string) *query.NumericRangeQuery { | |||
f := float64(value) | |||
tru := true | |||
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru) | |||
q.SetField(field) | |||
return q | |||
} | |||
// SearchIssuesByKeyword searches for issues by given conditions. | |||
// Returns the matching issue IDs | |||
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) { | |||
terms := strings.Fields(strings.ToLower(keyword)) | |||
indexerQuery := bleve.NewConjunctionQuery( | |||
numericQuery(repoID, "RepoID"), | |||
bleve.NewDisjunctionQuery( | |||
bleve.NewPhraseQuery(terms, "Title"), | |||
bleve.NewPhraseQuery(terms, "Content"), | |||
)) | |||
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false) | |||
search.Fields = []string{"ID"} | |||
result, err := issueIndexer.Search(search) | |||
if err != nil { | |||
return nil, err | |||
} | |||
issueIDs := make([]int64, len(result.Hits)) | |||
for i, hit := range result.Hits { | |||
issueIDs[i] = int64(hit.Fields["ID"].(float64)) | |||
} | |||
return issueIDs, nil | |||
} | |||
// issueIndexerUpdateQueue queue of issue ids to be updated | |||
var issueIndexerUpdateQueue chan int64 | |||
// InitIssueIndexer initialize issue indexer | |||
func InitIssueIndexer() { | |||
_, err := os.Stat(setting.Indexer.IssuePath) | |||
if err != nil { | |||
if os.IsNotExist(err) { | |||
if err = createIssueIndexer(); err != nil { | |||
log.Fatal(4, "CreateIssuesIndexer: %v", err) | |||
} | |||
if err = populateIssueIndexer(); err != nil { | |||
log.Fatal(4, "PopulateIssuesIndex: %v", err) | |||
} | |||
} else { | |||
log.Fatal(4, "InitIssuesIndexer: %v", err) | |||
} | |||
} else { | |||
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) | |||
if err != nil { | |||
log.Fatal(4, "InitIssuesIndexer, open index: %v", err) | |||
} | |||
} | |||
issueIndexerUpdateQueue = make(chan *Issue, setting.Indexer.UpdateQueueLength) | |||
indexer.InitIssueIndexer(populateIssueIndexer) | |||
issueIndexerUpdateQueue = make(chan int64, setting.Indexer.UpdateQueueLength) | |||
go processIssueIndexerUpdateQueue() | |||
// TODO close issueIndexer when Gitea closes | |||
} | |||
// createIssueIndexer create an issue indexer if one does not already exist | |||
func createIssueIndexer() error { | |||
mapping := bleve.NewIndexMapping() | |||
docMapping := bleve.NewDocumentMapping() | |||
docMapping.AddFieldMappingsAt("ID", bleve.NewNumericFieldMapping()) | |||
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) | |||
textFieldMapping := bleve.NewTextFieldMapping() | |||
textFieldMapping.Analyzer = simple.Name | |||
docMapping.AddFieldMappingsAt("Title", textFieldMapping) | |||
docMapping.AddFieldMappingsAt("Content", textFieldMapping) | |||
mapping.AddDocumentMapping("issues", docMapping) | |||
var err error | |||
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) | |||
return err | |||
} | |||
// populateIssueIndexer populate the issue indexer with issue data | |||
@@ -127,57 +36,64 @@ func populateIssueIndexer() error { | |||
if len(repos) == 0 { | |||
return nil | |||
} | |||
batch := issueIndexer.NewBatch() | |||
for _, repo := range repos { | |||
issues, err := Issues(&IssuesOptions{ | |||
RepoID: repo.ID, | |||
IsClosed: util.OptionalBoolNone, | |||
IsPull: util.OptionalBoolNone, | |||
}) | |||
if err != nil { | |||
return fmt.Errorf("Issues: %v", err) | |||
updates := make([]indexer.IssueIndexerUpdate, len(issues)) | |||
for i, issue := range issues { | |||
updates[i] = issue.update() | |||
} | |||
for _, issue := range issues { | |||
err = batch.Index(issue.indexUID(), issue.issueData()) | |||
if err != nil { | |||
return fmt.Errorf("batch.Index: %v", err) | |||
} | |||
if err = indexer.BatchUpdateIssues(updates...); err != nil { | |||
return fmt.Errorf("BatchUpdate: %v", err) | |||
} | |||
} | |||
if err = issueIndexer.Batch(batch); err != nil { | |||
return fmt.Errorf("index.Batch: %v", err) | |||
} | |||
} | |||
} | |||
func processIssueIndexerUpdateQueue() { | |||
for { | |||
select { | |||
case issue := <-issueIndexerUpdateQueue: | |||
if err := issueIndexer.Index(issue.indexUID(), issue.issueData()); err != nil { | |||
case issueID := <-issueIndexerUpdateQueue: | |||
issue, err := GetIssueByID(issueID) | |||
if err != nil { | |||
log.Error(4, "issuesIndexer.Index: %v", err) | |||
continue | |||
} | |||
if err = indexer.UpdateIssue(issue.update()); err != nil { | |||
log.Error(4, "issuesIndexer.Index: %v", err) | |||
} | |||
} | |||
} | |||
} | |||
// indexUID a unique identifier for an issue used in full-text indices | |||
func (issue *Issue) indexUID() string { | |||
return strconv.FormatInt(issue.ID, 36) | |||
} | |||
func (issue *Issue) issueData() *issueIndexerData { | |||
return &issueIndexerData{ | |||
ID: issue.ID, | |||
RepoID: issue.RepoID, | |||
Title: issue.Title, | |||
Content: issue.Content, | |||
func (issue *Issue) update() indexer.IssueIndexerUpdate { | |||
comments := make([]string, 0, 5) | |||
for _, comment := range issue.Comments { | |||
if comment.Type == CommentTypeComment { | |||
comments = append(comments, comment.Content) | |||
} | |||
} | |||
return indexer.IssueIndexerUpdate{ | |||
IssueID: issue.ID, | |||
Data: &indexer.IssueIndexerData{ | |||
RepoID: issue.RepoID, | |||
Title: issue.Title, | |||
Content: issue.Content, | |||
Comments: comments, | |||
}, | |||
} | |||
} | |||
// UpdateIssueIndexer add/update an issue to the issue indexer | |||
func UpdateIssueIndexer(issue *Issue) { | |||
go func() { | |||
issueIndexerUpdateQueue <- issue | |||
}() | |||
func UpdateIssueIndexer(issueID int64) { | |||
select { | |||
case issueIndexerUpdateQueue <- issueID: | |||
default: | |||
go func() { | |||
issueIndexerUpdateQueue <- issueID | |||
}() | |||
} | |||
} |
@@ -640,6 +640,8 @@ func NewPullRequest(repo *Repository, pull *Issue, labelIDs []int64, uuids []str | |||
return fmt.Errorf("Commit: %v", err) | |||
} | |||
UpdateIssueIndexer(pull.ID) | |||
if err = NotifyWatchers(&Action{ | |||
ActUserID: pull.Poster.ID, | |||
ActUser: pull.Poster, |
@@ -5,10 +5,39 @@ | |||
package indexer | |||
import ( | |||
"code.gitea.io/gitea/models" | |||
"fmt" | |||
"strconv" | |||
"github.com/blevesearch/bleve" | |||
"github.com/blevesearch/bleve/search/query" | |||
) | |||
// NewContext start indexer service | |||
func NewContext() { | |||
models.InitIssueIndexer() | |||
// indexerID a bleve-compatible unique identifier for an integer id | |||
func indexerID(id int64) string { | |||
return strconv.FormatInt(id, 36) | |||
} | |||
// idOfIndexerID the integer id associated with an indexer id | |||
func idOfIndexerID(indexerID string) (int64, error) { | |||
id, err := strconv.ParseInt(indexerID, 36, 64) | |||
if err != nil { | |||
return 0, fmt.Errorf("Unexpected indexer ID %s: %v", indexerID, err) | |||
} | |||
return id, nil | |||
} | |||
// numericEqualityQuery a numeric equality query for the given value and field | |||
func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery { | |||
f := float64(value) | |||
tru := true | |||
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru) | |||
q.SetField(field) | |||
return q | |||
} | |||
func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery { | |||
q := bleve.NewMatchPhraseQuery(matchPhrase) | |||
q.FieldVal = field | |||
q.Analyzer = analyzer | |||
return q | |||
} |
@@ -0,0 +1,143 @@ | |||
// Copyright 2017 The Gitea Authors. All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package indexer | |||
import ( | |||
"os" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/setting" | |||
"github.com/blevesearch/bleve" | |||
"github.com/blevesearch/bleve/analysis/analyzer/custom" | |||
"github.com/blevesearch/bleve/analysis/token/lowercase" | |||
"github.com/blevesearch/bleve/analysis/token/unicodenorm" | |||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" | |||
) | |||
// issueIndexer (thread-safe) index for searching issues | |||
var issueIndexer bleve.Index | |||
// IssueIndexerData data stored in the issue indexer | |||
type IssueIndexerData struct { | |||
RepoID int64 | |||
Title string | |||
Content string | |||
Comments []string | |||
} | |||
// IssueIndexerUpdate an update to the issue indexer | |||
type IssueIndexerUpdate struct { | |||
IssueID int64 | |||
Data *IssueIndexerData | |||
} | |||
const issueIndexerAnalyzer = "issueIndexer" | |||
// InitIssueIndexer initialize issue indexer | |||
func InitIssueIndexer(populateIndexer func() error) { | |||
_, err := os.Stat(setting.Indexer.IssuePath) | |||
if err != nil { | |||
if os.IsNotExist(err) { | |||
if err = createIssueIndexer(); err != nil { | |||
log.Fatal(4, "CreateIssuesIndexer: %v", err) | |||
} | |||
if err = populateIndexer(); err != nil { | |||
log.Fatal(4, "PopulateIssuesIndex: %v", err) | |||
} | |||
} else { | |||
log.Fatal(4, "InitIssuesIndexer: %v", err) | |||
} | |||
} else { | |||
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) | |||
if err != nil { | |||
log.Error(4, "Unable to open issues indexer (%s)."+ | |||
" If the error is due to incompatible versions, try deleting the indexer files;"+ | |||
" gitea will recreate them with the appropriate version the next time it runs."+ | |||
" Deleting the indexer files will not result in loss of data.", | |||
setting.Indexer.IssuePath) | |||
log.Fatal(4, "InitIssuesIndexer, open index: %v", err) | |||
} | |||
} | |||
} | |||
// createIssueIndexer create an issue indexer if one does not already exist | |||
func createIssueIndexer() error { | |||
mapping := bleve.NewIndexMapping() | |||
docMapping := bleve.NewDocumentMapping() | |||
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) | |||
textFieldMapping := bleve.NewTextFieldMapping() | |||
docMapping.AddFieldMappingsAt("Title", textFieldMapping) | |||
docMapping.AddFieldMappingsAt("Content", textFieldMapping) | |||
docMapping.AddFieldMappingsAt("Comments", textFieldMapping) | |||
const unicodeNormNFC = "unicodeNormNFC" | |||
if err := mapping.AddCustomTokenFilter(unicodeNormNFC, map[string]interface{}{ | |||
"type": unicodenorm.Name, | |||
"form": unicodenorm.NFC, | |||
}); err != nil { | |||
return err | |||
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{ | |||
"type": custom.Name, | |||
"char_filters": []string{}, | |||
"tokenizer": unicode.Name, | |||
"token_filters": []string{unicodeNormNFC, lowercase.Name}, | |||
}); err != nil { | |||
return err | |||
} | |||
mapping.DefaultAnalyzer = issueIndexerAnalyzer | |||
mapping.AddDocumentMapping("issues", docMapping) | |||
var err error | |||
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) | |||
return err | |||
} | |||
// UpdateIssue update the issue indexer | |||
func UpdateIssue(update IssueIndexerUpdate) error { | |||
return issueIndexer.Index(indexerID(update.IssueID), update.Data) | |||
} | |||
// BatchUpdateIssues perform a batch update of the issue indexer | |||
func BatchUpdateIssues(updates ...IssueIndexerUpdate) error { | |||
batch := issueIndexer.NewBatch() | |||
for _, update := range updates { | |||
err := batch.Index(indexerID(update.IssueID), update.Data) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
return issueIndexer.Batch(batch) | |||
} | |||
// SearchIssuesByKeyword searches for issues by given conditions. | |||
// Returns the matching issue IDs | |||
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) { | |||
indexerQuery := bleve.NewConjunctionQuery( | |||
numericEqualityQuery(repoID, "RepoID"), | |||
bleve.NewDisjunctionQuery( | |||
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer), | |||
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer), | |||
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer), | |||
)) | |||
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false) | |||
result, err := issueIndexer.Search(search) | |||
if err != nil { | |||
return nil, err | |||
} | |||
issueIDs := make([]int64, len(result.Hits)) | |||
for i, hit := range result.Hits { | |||
issueIDs[i], err = idOfIndexerID(hit.ID) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
return issueIDs, nil | |||
} |
@@ -13,7 +13,6 @@ import ( | |||
"code.gitea.io/gitea/models/migrations" | |||
"code.gitea.io/gitea/modules/cron" | |||
"code.gitea.io/gitea/modules/highlight" | |||
"code.gitea.io/gitea/modules/indexer" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/mailer" | |||
"code.gitea.io/gitea/modules/markup" | |||
@@ -63,7 +62,7 @@ func GlobalInit() { | |||
// Booting long running goroutines. | |||
cron.NewContext() | |||
indexer.NewContext() | |||
models.InitIssueIndexer() | |||
models.InitSyncMirrors() | |||
models.InitDeliverHooks() | |||
models.InitTestPullRequests() |
@@ -22,6 +22,7 @@ import ( | |||
"code.gitea.io/gitea/modules/auth" | |||
"code.gitea.io/gitea/modules/base" | |||
"code.gitea.io/gitea/modules/context" | |||
"code.gitea.io/gitea/modules/indexer" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/markdown" | |||
"code.gitea.io/gitea/modules/notification" | |||
@@ -142,7 +143,7 @@ func Issues(ctx *context.Context) { | |||
var issueIDs []int64 | |||
var err error | |||
if len(keyword) > 0 { | |||
issueIDs, err = models.SearchIssuesByKeyword(repo.ID, keyword) | |||
issueIDs, err = indexer.SearchIssuesByKeyword(repo.ID, keyword) | |||
if len(issueIDs) == 0 { | |||
forceEmpty = true | |||
} |
@@ -4,6 +4,7 @@ | |||
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | |||
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve) | |||
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve) | |||
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) | |||
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/) | |||
@@ -33,29 +34,33 @@ Discuss usage and development of bleve in the [google group](https://groups.goog | |||
## Indexing | |||
message := struct{ | |||
Id string | |||
From string | |||
Body string | |||
}{ | |||
Id: "example", | |||
From: "marty.schoch@gmail.com", | |||
Body: "bleve indexing is easy", | |||
} | |||
mapping := bleve.NewIndexMapping() | |||
index, err := bleve.New("example.bleve", mapping) | |||
if err != nil { | |||
panic(err) | |||
} | |||
index.Index(message.Id, message) | |||
```go | |||
message := struct{ | |||
Id string | |||
From string | |||
Body string | |||
}{ | |||
Id: "example", | |||
From: "marty.schoch@gmail.com", | |||
Body: "bleve indexing is easy", | |||
} | |||
mapping := bleve.NewIndexMapping() | |||
index, err := bleve.New("example.bleve", mapping) | |||
if err != nil { | |||
panic(err) | |||
} | |||
index.Index(message.Id, message) | |||
``` | |||
## Querying | |||
index, _ := bleve.Open("example.bleve") | |||
query := bleve.NewQueryStringQuery("bleve") | |||
searchRequest := bleve.NewSearchRequest(query) | |||
searchResult, _ := index.Search(searchRequest) | |||
```go | |||
index, _ := bleve.Open("example.bleve") | |||
query := bleve.NewQueryStringQuery("bleve") | |||
searchRequest := bleve.NewSearchRequest(query) | |||
searchResult, _ := index.Search(searchRequest) | |||
``` | |||
## License | |||
@@ -0,0 +1,145 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package custom | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "custom" | |||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { | |||
var err error | |||
var charFilters []analysis.CharFilter | |||
charFiltersValue, ok := config["char_filters"] | |||
if ok { | |||
switch charFiltersValue := charFiltersValue.(type) { | |||
case []string: | |||
charFilters, err = getCharFilters(charFiltersValue, cache) | |||
if err != nil { | |||
return nil, err | |||
} | |||
case []interface{}: | |||
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter") | |||
if err != nil { | |||
return nil, err | |||
} | |||
charFilters, err = getCharFilters(charFiltersNames, cache) | |||
if err != nil { | |||
return nil, err | |||
} | |||
default: | |||
return nil, fmt.Errorf("unsupported type for char_filters, must be slice") | |||
} | |||
} | |||
var tokenizerName string | |||
tokenizerValue, ok := config["tokenizer"] | |||
if ok { | |||
tokenizerName, ok = tokenizerValue.(string) | |||
if !ok { | |||
return nil, fmt.Errorf("must specify tokenizer as string") | |||
} | |||
} else { | |||
return nil, fmt.Errorf("must specify tokenizer") | |||
} | |||
tokenizer, err := cache.TokenizerNamed(tokenizerName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
var tokenFilters []analysis.TokenFilter | |||
tokenFiltersValue, ok := config["token_filters"] | |||
if ok { | |||
switch tokenFiltersValue := tokenFiltersValue.(type) { | |||
case []string: | |||
tokenFilters, err = getTokenFilters(tokenFiltersValue, cache) | |||
if err != nil { | |||
return nil, err | |||
} | |||
case []interface{}: | |||
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter") | |||
if err != nil { | |||
return nil, err | |||
} | |||
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache) | |||
if err != nil { | |||
return nil, err | |||
} | |||
default: | |||
return nil, fmt.Errorf("unsupported type for token_filters, must be slice") | |||
} | |||
} | |||
rv := analysis.Analyzer{ | |||
Tokenizer: tokenizer, | |||
} | |||
if charFilters != nil { | |||
rv.CharFilters = charFilters | |||
} | |||
if tokenFilters != nil { | |||
rv.TokenFilters = tokenFilters | |||
} | |||
return &rv, nil | |||
} | |||
func init() { | |||
registry.RegisterAnalyzer(Name, AnalyzerConstructor) | |||
} | |||
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) { | |||
charFilters := make([]analysis.CharFilter, len(charFilterNames)) | |||
for i, charFilterName := range charFilterNames { | |||
charFilter, err := cache.CharFilterNamed(charFilterName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
charFilters[i] = charFilter | |||
} | |||
return charFilters, nil | |||
} | |||
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) { | |||
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames)) | |||
for i, tokenFilterName := range tokenFilterNames { | |||
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName) | |||
if err != nil { | |||
return nil, err | |||
} | |||
tokenFilters[i] = tokenFilter | |||
} | |||
return tokenFilters, nil | |||
} | |||
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) { | |||
stringSlice := make([]string, len(interfaceSlice)) | |||
for i, interfaceObj := range interfaceSlice { | |||
stringObj, ok := interfaceObj.(string) | |||
if ok { | |||
stringSlice[i] = stringObj | |||
} else { | |||
return nil, fmt.Errorf(objType + " name must be a string") | |||
} | |||
} | |||
return stringSlice, nil | |||
} |
@@ -1,46 +0,0 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package simple | |||
import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/token/lowercase" | |||
"github.com/blevesearch/bleve/analysis/tokenizer/letter" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "simple" | |||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { | |||
tokenizer, err := cache.TokenizerNamed(letter.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv := analysis.Analyzer{ | |||
Tokenizer: tokenizer, | |||
TokenFilters: []analysis.TokenFilter{ | |||
toLowerFilter, | |||
}, | |||
} | |||
return &rv, nil | |||
} | |||
func init() { | |||
registry.RegisterAnalyzer(Name, AnalyzerConstructor) | |||
} |
@@ -0,0 +1,79 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package unicodenorm | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/registry" | |||
"golang.org/x/text/unicode/norm" | |||
) | |||
const Name = "normalize_unicode" | |||
const NFC = "nfc" | |||
const NFD = "nfd" | |||
const NFKC = "nfkc" | |||
const NFKD = "nfkd" | |||
var forms = map[string]norm.Form{ | |||
NFC: norm.NFC, | |||
NFD: norm.NFD, | |||
NFKC: norm.NFKC, | |||
NFKD: norm.NFKD, | |||
} | |||
type UnicodeNormalizeFilter struct { | |||
form norm.Form | |||
} | |||
func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) { | |||
form, ok := forms[formName] | |||
if !ok { | |||
return nil, fmt.Errorf("no form named %s", formName) | |||
} | |||
return &UnicodeNormalizeFilter{ | |||
form: form, | |||
}, nil | |||
} | |||
func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter { | |||
filter, err := NewUnicodeNormalizeFilter(formName) | |||
if err != nil { | |||
panic(err) | |||
} | |||
return filter | |||
} | |||
func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { | |||
for _, token := range input { | |||
token.Term = s.form.Bytes(token.Term) | |||
} | |||
return input | |||
} | |||
func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { | |||
formVal, ok := config["form"].(string) | |||
if !ok { | |||
return nil, fmt.Errorf("must specify form") | |||
} | |||
form := formVal | |||
return NewUnicodeNormalizeFilter(form) | |||
} | |||
func init() { | |||
registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor) | |||
} |
@@ -1,76 +0,0 @@ | |||
// Copyright (c) 2016 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package character | |||
import ( | |||
"unicode/utf8" | |||
"github.com/blevesearch/bleve/analysis" | |||
) | |||
type IsTokenRune func(r rune) bool | |||
type CharacterTokenizer struct { | |||
isTokenRun IsTokenRune | |||
} | |||
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer { | |||
return &CharacterTokenizer{ | |||
isTokenRun: f, | |||
} | |||
} | |||
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream { | |||
rv := make(analysis.TokenStream, 0, 1024) | |||
offset := 0 | |||
start := 0 | |||
end := 0 | |||
count := 0 | |||
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) { | |||
isToken := c.isTokenRun(currRune) | |||
if isToken { | |||
end = offset + size | |||
} else { | |||
if end-start > 0 { | |||
// build token | |||
rv = append(rv, &analysis.Token{ | |||
Term: input[start:end], | |||
Start: start, | |||
End: end, | |||
Position: count + 1, | |||
Type: analysis.AlphaNumeric, | |||
}) | |||
count++ | |||
} | |||
start = offset + size | |||
end = start | |||
} | |||
offset += size | |||
} | |||
// if we ended in the middle of a token, finish it | |||
if end-start > 0 { | |||
// build token | |||
rv = append(rv, &analysis.Token{ | |||
Term: input[start:end], | |||
Start: start, | |||
End: end, | |||
Position: count + 1, | |||
Type: analysis.AlphaNumeric, | |||
}) | |||
} | |||
return rv | |||
} |
@@ -1,33 +0,0 @@ | |||
// Copyright (c) 2016 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package letter | |||
import ( | |||
"unicode" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/tokenizer/character" | |||
"github.com/blevesearch/bleve/registry" | |||
) | |||
const Name = "letter" | |||
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { | |||
return character.NewCharacterTokenizer(unicode.IsLetter), nil | |||
} | |||
func init() { | |||
registry.RegisterTokenizer(Name, TokenizerConstructor) | |||
} |
@@ -1,23 +0,0 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
// +build appengine appenginevm | |||
package bleve | |||
// in the appengine environment we cannot support disk based indexes | |||
// so we do no extra configuration in this method | |||
func initDisk() { | |||
} |
@@ -0,0 +1,137 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package document | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/geo" | |||
"github.com/blevesearch/bleve/numeric" | |||
) | |||
var GeoPrecisionStep uint = 9 | |||
type GeoPointField struct { | |||
name string | |||
arrayPositions []uint64 | |||
options IndexingOptions | |||
value numeric.PrefixCoded | |||
numPlainTextBytes uint64 | |||
} | |||
func (n *GeoPointField) Name() string { | |||
return n.name | |||
} | |||
func (n *GeoPointField) ArrayPositions() []uint64 { | |||
return n.arrayPositions | |||
} | |||
func (n *GeoPointField) Options() IndexingOptions { | |||
return n.options | |||
} | |||
func (n *GeoPointField) Analyze() (int, analysis.TokenFrequencies) { | |||
tokens := make(analysis.TokenStream, 0) | |||
tokens = append(tokens, &analysis.Token{ | |||
Start: 0, | |||
End: len(n.value), | |||
Term: n.value, | |||
Position: 1, | |||
Type: analysis.Numeric, | |||
}) | |||
original, err := n.value.Int64() | |||
if err == nil { | |||
shift := GeoPrecisionStep | |||
for shift < 64 { | |||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) | |||
if err != nil { | |||
break | |||
} | |||
token := analysis.Token{ | |||
Start: 0, | |||
End: len(shiftEncoded), | |||
Term: shiftEncoded, | |||
Position: 1, | |||
Type: analysis.Numeric, | |||
} | |||
tokens = append(tokens, &token) | |||
shift += GeoPrecisionStep | |||
} | |||
} | |||
fieldLength := len(tokens) | |||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) | |||
return fieldLength, tokenFreqs | |||
} | |||
func (n *GeoPointField) Value() []byte { | |||
return n.value | |||
} | |||
func (n *GeoPointField) Lon() (float64, error) { | |||
i64, err := n.value.Int64() | |||
if err != nil { | |||
return 0.0, err | |||
} | |||
return geo.MortonUnhashLon(uint64(i64)), nil | |||
} | |||
func (n *GeoPointField) Lat() (float64, error) { | |||
i64, err := n.value.Int64() | |||
if err != nil { | |||
return 0.0, err | |||
} | |||
return geo.MortonUnhashLat(uint64(i64)), nil | |||
} | |||
func (n *GeoPointField) GoString() string { | |||
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) | |||
} | |||
func (n *GeoPointField) NumPlainTextBytes() uint64 { | |||
return n.numPlainTextBytes | |||
} | |||
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField { | |||
return &GeoPointField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
value: value, | |||
options: DefaultNumericIndexingOptions, | |||
numPlainTextBytes: uint64(len(value)), | |||
} | |||
} | |||
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField { | |||
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions) | |||
} | |||
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options IndexingOptions) *GeoPointField { | |||
mhash := geo.MortonHash(lon, lat) | |||
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0) | |||
return &GeoPointField{ | |||
name: name, | |||
arrayPositions: arrayPositions, | |||
value: prefixCoded, | |||
options: options, | |||
// not correct, just a place holder until we revisit how fields are | |||
// represented and can fix this better | |||
numPlainTextBytes: uint64(8), | |||
} | |||
} |
@@ -0,0 +1,9 @@ | |||
# geo support in bleve | |||
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html). | |||
## Notes | |||
- All of the APIs will use float64 for lon/lat values. | |||
- When describing a point in function arguments or return values, we always use the order lon, lat. | |||
- High level APIs will use TopLeft and BottomRight to describe bounding boxes. This may not map cleanly to min/max lon/lat when crossing the dateline. The lower level APIs will use min/max lon/lat and require the higher-level code to split boxes accordingly. |
@@ -0,0 +1,170 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package geo | |||
import ( | |||
"fmt" | |||
"math" | |||
"github.com/blevesearch/bleve/numeric" | |||
) | |||
// GeoBits is the number of bits used for a single geo point | |||
// Currently this is 32bits for lon and 32bits for lat | |||
var GeoBits uint = 32 | |||
var minLon = -180.0 | |||
var minLat = -90.0 | |||
var maxLon = 180.0 | |||
var maxLat = 90.0 | |||
var minLonRad = minLon * degreesToRadian | |||
var minLatRad = minLat * degreesToRadian | |||
var maxLonRad = maxLon * degreesToRadian | |||
var maxLatRad = maxLat * degreesToRadian | |||
var geoTolerance = 1E-6 | |||
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0 | |||
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0 | |||
// MortonHash computes the morton hash value for the provided geo point | |||
// This point is ordered as lon, lat. | |||
func MortonHash(lon, lat float64) uint64 { | |||
return numeric.Interleave(scaleLon(lon), scaleLat(lat)) | |||
} | |||
func scaleLon(lon float64) uint64 { | |||
rv := uint64((lon - minLon) * lonScale) | |||
return rv | |||
} | |||
func scaleLat(lat float64) uint64 { | |||
rv := uint64((lat - minLat) * latScale) | |||
return rv | |||
} | |||
// MortonUnhashLon extracts the longitude value from the provided morton hash. | |||
func MortonUnhashLon(hash uint64) float64 { | |||
return unscaleLon(numeric.Deinterleave(hash)) | |||
} | |||
// MortonUnhashLat extracts the latitude value from the provided morton hash. | |||
func MortonUnhashLat(hash uint64) float64 { | |||
return unscaleLat(numeric.Deinterleave(hash >> 1)) | |||
} | |||
func unscaleLon(lon uint64) float64 { | |||
return (float64(lon) / lonScale) + minLon | |||
} | |||
func unscaleLat(lat uint64) float64 { | |||
return (float64(lat) / latScale) + minLat | |||
} | |||
// compareGeo will compare two float values and see if they are the same | |||
// taking into consideration a known geo tolerance. | |||
func compareGeo(a, b float64) float64 { | |||
compare := a - b | |||
if math.Abs(compare) <= geoTolerance { | |||
return 0 | |||
} | |||
return compare | |||
} | |||
// RectIntersects checks whether rectangles a and b intersect | |||
func RectIntersects(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool { | |||
return !(aMaxX < bMinX || aMinX > bMaxX || aMaxY < bMinY || aMinY > bMaxY) | |||
} | |||
// RectWithin checks whether box a is within box b | |||
func RectWithin(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool { | |||
rv := !(aMinX < bMinX || aMinY < bMinY || aMaxX > bMaxX || aMaxY > bMaxY) | |||
return rv | |||
} | |||
// BoundingBoxContains checks whether the lon/lat point is within the box | |||
func BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat float64) bool { | |||
return compareGeo(lon, minLon) >= 0 && compareGeo(lon, maxLon) <= 0 && | |||
compareGeo(lat, minLat) >= 0 && compareGeo(lat, maxLat) <= 0 | |||
} | |||
const degreesToRadian = math.Pi / 180 | |||
const radiansToDegrees = 180 / math.Pi | |||
// DegreesToRadians converts an angle in degrees to radians | |||
func DegreesToRadians(d float64) float64 { | |||
return d * degreesToRadian | |||
} | |||
// RadiansToDegrees converts an angle in radians to degress | |||
func RadiansToDegrees(r float64) float64 { | |||
return r * radiansToDegrees | |||
} | |||
var earthMeanRadiusMeters = 6371008.7714 | |||
func RectFromPointDistance(lon, lat, dist float64) (float64, float64, float64, float64, error) { | |||
err := checkLongitude(lon) | |||
if err != nil { | |||
return 0, 0, 0, 0, err | |||
} | |||
err = checkLatitude(lat) | |||
if err != nil { | |||
return 0, 0, 0, 0, err | |||
} | |||
radLon := DegreesToRadians(lon) | |||
radLat := DegreesToRadians(lat) | |||
radDistance := (dist + 7e-2) / earthMeanRadiusMeters | |||
minLatL := radLat - radDistance | |||
maxLatL := radLat + radDistance | |||
var minLonL, maxLonL float64 | |||
if minLatL > minLatRad && maxLatL < maxLatRad { | |||
deltaLon := asin(sin(radDistance) / cos(radLat)) | |||
minLonL = radLon - deltaLon | |||
if minLonL < minLonRad { | |||
minLonL += 2 * math.Pi | |||
} | |||
maxLonL = radLon + deltaLon | |||
if maxLonL > maxLonRad { | |||
maxLonL -= 2 * math.Pi | |||
} | |||
} else { | |||
// pole is inside distance | |||
minLatL = math.Max(minLatL, minLatRad) | |||
maxLatL = math.Min(maxLatL, maxLatRad) | |||
minLonL = minLonRad | |||
maxLonL = maxLonRad | |||
} | |||
return RadiansToDegrees(minLonL), | |||
RadiansToDegrees(maxLatL), | |||
RadiansToDegrees(maxLonL), | |||
RadiansToDegrees(minLatL), | |||
nil | |||
} | |||
func checkLatitude(latitude float64) error { | |||
if math.IsNaN(latitude) || latitude < minLat || latitude > maxLat { | |||
return fmt.Errorf("invalid latitude %f; must be between %f and %f", latitude, minLat, maxLat) | |||
} | |||
return nil | |||
} | |||
func checkLongitude(longitude float64) error { | |||
if math.IsNaN(longitude) || longitude < minLon || longitude > maxLon { | |||
return fmt.Errorf("invalid longitude %f; must be between %f and %f", longitude, minLon, maxLon) | |||
} | |||
return nil | |||
} |
@@ -0,0 +1,98 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package geo | |||
import ( | |||
"fmt" | |||
"math" | |||
"strconv" | |||
"strings" | |||
) | |||
type distanceUnit struct { | |||
conv float64 | |||
suffixes []string | |||
} | |||
var inch = distanceUnit{0.0254, []string{"in", "inch"}} | |||
var yard = distanceUnit{0.9144, []string{"yd", "yards"}} | |||
var feet = distanceUnit{0.3048, []string{"ft", "feet"}} | |||
var kilom = distanceUnit{1000, []string{"km", "kilometers"}} | |||
var nauticalm = distanceUnit{1852.0, []string{"nm", "nauticalmiles"}} | |||
var millim = distanceUnit{0.001, []string{"mm", "millimeters"}} | |||
var centim = distanceUnit{0.01, []string{"cm", "centimeters"}} | |||
var miles = distanceUnit{1609.344, []string{"mi", "miles"}} | |||
var meters = distanceUnit{1, []string{"m", "meters"}} | |||
var distanceUnits = []*distanceUnit{ | |||
&inch, &yard, &feet, &kilom, &nauticalm, &millim, ¢im, &miles, &meters, | |||
} | |||
// ParseDistance attempts to parse a distance string and return distance in | |||
// meters. Example formats supported: | |||
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers" | |||
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters" | |||
// "17mi" "17miles" "19m" "19meters" | |||
// If the unit cannot be determined, the entire string is parsed and the | |||
// unit of meters is assumed. | |||
// If the number portion cannot be parsed, 0 and the parse error are returned. | |||
func ParseDistance(d string) (float64, error) { | |||
for _, unit := range distanceUnits { | |||
for _, unitSuffix := range unit.suffixes { | |||
if strings.HasSuffix(d, unitSuffix) { | |||
parsedNum, err := strconv.ParseFloat(d[0:len(d)-len(unitSuffix)], 64) | |||
if err != nil { | |||
return 0, err | |||
} | |||
return parsedNum * unit.conv, nil | |||
} | |||
} | |||
} | |||
// no unit matched, try assuming meters? | |||
parsedNum, err := strconv.ParseFloat(d, 64) | |||
if err != nil { | |||
return 0, err | |||
} | |||
return parsedNum, nil | |||
} | |||
// ParseDistanceUnit attempts to parse a distance unit and return the | |||
// multiplier for converting this to meters. If the unit cannot be parsed | |||
// then 0 and the error message is returned. | |||
func ParseDistanceUnit(u string) (float64, error) { | |||
for _, unit := range distanceUnits { | |||
for _, unitSuffix := range unit.suffixes { | |||
if u == unitSuffix { | |||
return unit.conv, nil | |||
} | |||
} | |||
} | |||
return 0, fmt.Errorf("unknown distance unit: %s", u) | |||
} | |||
// Haversin computes the distance between two points. | |||
// This implemenation uses the sloppy math implemenations which trade off | |||
// accuracy for performance. The distance returned is in kilometers. | |||
func Haversin(lon1, lat1, lon2, lat2 float64) float64 { | |||
x1 := lat1 * degreesToRadian | |||
x2 := lat2 * degreesToRadian | |||
h1 := 1 - cos(x1-x2) | |||
h2 := 1 - cos((lon1-lon2)*degreesToRadian) | |||
h := (h1 + cos(x1)*cos(x2)*h2) / 2 | |||
avgLat := (x1 + x2) / 2 | |||
diameter := earthDiameter(avgLat) | |||
return diameter * asin(math.Min(1, math.Sqrt(h))) | |||
} |
@@ -0,0 +1,140 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package geo | |||
import ( | |||
"reflect" | |||
"strings" | |||
) | |||
// ExtractGeoPoint takes an arbitrary interface{} and tries it's best to | |||
// interpret it is as geo point. Supported formats: | |||
// Container: | |||
// slice length 2 (GeoJSON) | |||
// first element lon, second element lat | |||
// map[string]interface{} | |||
// exact keys lat and lon or lng | |||
// struct | |||
// w/exported fields case-insensitive match on lat and lon or lng | |||
// struct | |||
// satisfying Later and Loner or Lnger interfaces | |||
// | |||
// in all cases values must be some sort of numeric-like thing: int/uint/float | |||
func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { | |||
var foundLon, foundLat bool | |||
thingVal := reflect.ValueOf(thing) | |||
thingTyp := thingVal.Type() | |||
// is it a slice | |||
if thingVal.IsValid() && thingVal.Kind() == reflect.Slice { | |||
// must be length 2 | |||
if thingVal.Len() == 2 { | |||
first := thingVal.Index(0) | |||
if first.CanInterface() { | |||
firstVal := first.Interface() | |||
lon, foundLon = extractNumericVal(firstVal) | |||
} | |||
second := thingVal.Index(1) | |||
if second.CanInterface() { | |||
secondVal := second.Interface() | |||
lat, foundLat = extractNumericVal(secondVal) | |||
} | |||
} | |||
} | |||
// is it a map | |||
if l, ok := thing.(map[string]interface{}); ok { | |||
if lval, ok := l["lon"]; ok { | |||
lon, foundLon = extractNumericVal(lval) | |||
} else if lval, ok := l["lng"]; ok { | |||
lon, foundLon = extractNumericVal(lval) | |||
} | |||
if lval, ok := l["lat"]; ok { | |||
lat, foundLat = extractNumericVal(lval) | |||
} | |||
} | |||
// now try reflection on struct fields | |||
if thingVal.IsValid() && thingVal.Kind() == reflect.Struct { | |||
for i := 0; i < thingVal.NumField(); i++ { | |||
fieldName := thingTyp.Field(i).Name | |||
if strings.HasPrefix(strings.ToLower(fieldName), "lon") { | |||
if thingVal.Field(i).CanInterface() { | |||
fieldVal := thingVal.Field(i).Interface() | |||
lon, foundLon = extractNumericVal(fieldVal) | |||
} | |||
} | |||
if strings.HasPrefix(strings.ToLower(fieldName), "lng") { | |||
if thingVal.Field(i).CanInterface() { | |||
fieldVal := thingVal.Field(i).Interface() | |||
lon, foundLon = extractNumericVal(fieldVal) | |||
} | |||
} | |||
if strings.HasPrefix(strings.ToLower(fieldName), "lat") { | |||
if thingVal.Field(i).CanInterface() { | |||
fieldVal := thingVal.Field(i).Interface() | |||
lat, foundLat = extractNumericVal(fieldVal) | |||
} | |||
} | |||
} | |||
} | |||
// last hope, some interfaces | |||
// lon | |||
if l, ok := thing.(loner); ok { | |||
lon = l.Lon() | |||
foundLon = true | |||
} else if l, ok := thing.(lnger); ok { | |||
lon = l.Lng() | |||
foundLon = true | |||
} | |||
// lat | |||
if l, ok := thing.(later); ok { | |||
lat = l.Lat() | |||
foundLat = true | |||
} | |||
return lon, lat, foundLon && foundLat | |||
} | |||
// extract numeric value (if possible) and returns a float64 | |||
func extractNumericVal(v interface{}) (float64, bool) { | |||
val := reflect.ValueOf(v) | |||
typ := val.Type() | |||
switch typ.Kind() { | |||
case reflect.Float32, reflect.Float64: | |||
return val.Float(), true | |||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: | |||
return float64(val.Int()), true | |||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: | |||
return float64(val.Uint()), true | |||
} | |||
return 0, false | |||
} | |||
// various support interfaces which can be used to find lat/lon | |||
type loner interface { | |||
Lon() float64 | |||
} | |||
type later interface { | |||
Lat() float64 | |||
} | |||
type lnger interface { | |||
Lng() float64 | |||
} |
@@ -0,0 +1,212 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package geo | |||
import ( | |||
"math" | |||
) | |||
var earthDiameterPerLatitude []float64 | |||
var sinTab []float64 | |||
var cosTab []float64 | |||
var asinTab []float64 | |||
var asinDer1DivF1Tab []float64 | |||
var asinDer2DivF2Tab []float64 | |||
var asinDer3DivF3Tab []float64 | |||
var asinDer4DivF4Tab []float64 | |||
const radiusTabsSize = (1 << 10) + 1 | |||
const radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1) | |||
const radiusIndexer = 1 / radiusDelta | |||
const sinCosTabsSize = (1 << 11) + 1 | |||
const asinTabsSize = (1 << 13) + 1 | |||
const oneDivF2 = 1 / 2.0 | |||
const oneDivF3 = 1 / 6.0 | |||
const oneDivF4 = 1 / 24.0 | |||
// 1.57079632673412561417e+00 first 33 bits of pi/2 | |||
var pio2Hi = math.Float64frombits(0x3FF921FB54400000) | |||
// 6.07710050650619224932e-11 pi/2 - PIO2_HI | |||
var pio2Lo = math.Float64frombits(0x3DD0B4611A626331) | |||
var asinPio2Hi = math.Float64frombits(0x3FF921FB54442D18) // 1.57079632679489655800e+00 | |||
var asinPio2Lo = math.Float64frombits(0x3C91A62633145C07) // 6.12323399573676603587e-17 | |||
var asinPs0 = math.Float64frombits(0x3fc5555555555555) // 1.66666666666666657415e-01 | |||
var asinPs1 = math.Float64frombits(0xbfd4d61203eb6f7d) // -3.25565818622400915405e-01 | |||
var asinPs2 = math.Float64frombits(0x3fc9c1550e884455) // 2.01212532134862925881e-01 | |||
var asinPs3 = math.Float64frombits(0xbfa48228b5688f3b) // -4.00555345006794114027e-02 | |||
var asinPs4 = math.Float64frombits(0x3f49efe07501b288) // 7.91534994289814532176e-04 | |||
var asinPs5 = math.Float64frombits(0x3f023de10dfdf709) // 3.47933107596021167570e-05 | |||
var asinQs1 = math.Float64frombits(0xc0033a271c8a2d4b) // -2.40339491173441421878e+00 | |||
var asinQs2 = math.Float64frombits(0x40002ae59c598ac8) // 2.02094576023350569471e+00 | |||
var asinQs3 = math.Float64frombits(0xbfe6066c1b8d0159) // -6.88283971605453293030e-01 | |||
var asinQs4 = math.Float64frombits(0x3fb3b8c5b12e9282) // 7.70381505559019352791e-02 | |||
var twoPiHi = 4 * pio2Hi | |||
var twoPiLo = 4 * pio2Lo | |||
var sinCosDeltaHi = twoPiHi/sinCosTabsSize - 1 | |||
var sinCosDeltaLo = twoPiLo/sinCosTabsSize - 1 | |||
var sinCosIndexer = 1 / (sinCosDeltaHi + sinCosDeltaLo) | |||
var sinCosMaxValueForIntModulo = ((math.MaxInt64 >> 9) / sinCosIndexer) * 0.99 | |||
var asinMaxValueForTabs = math.Sin(73.0 * degreesToRadian) | |||
var asinDelta = asinMaxValueForTabs / (asinTabsSize - 1) | |||
var asinIndexer = 1 / asinDelta | |||
func init() { | |||
// initializes the tables used for the sloppy math functions | |||
// sin and cos | |||
sinTab = make([]float64, sinCosTabsSize) | |||
cosTab = make([]float64, sinCosTabsSize) | |||
sinCosPiIndex := (sinCosTabsSize - 1) / 2 | |||
sinCosPiMul2Index := 2 * sinCosPiIndex | |||
sinCosPiMul05Index := sinCosPiIndex / 2 | |||
sinCosPiMul15Index := 3 * sinCosPiIndex / 2 | |||
for i := 0; i < sinCosTabsSize; i++ { | |||
// angle: in [0,2*PI]. | |||
angle := float64(i)*sinCosDeltaHi + float64(i)*sinCosDeltaLo | |||
sinAngle := math.Sin(angle) | |||
cosAngle := math.Cos(angle) | |||
// For indexes corresponding to null cosine or sine, we make sure the value is zero | |||
// and not an epsilon. This allows for a much better accuracy for results close to zero. | |||
if i == sinCosPiIndex { | |||
sinAngle = 0.0 | |||
} else if i == sinCosPiMul2Index { | |||
sinAngle = 0.0 | |||
} else if i == sinCosPiMul05Index { | |||
sinAngle = 0.0 | |||
} else if i == sinCosPiMul15Index { | |||
sinAngle = 0.0 | |||
} | |||
sinTab[i] = sinAngle | |||
cosTab[i] = cosAngle | |||
} | |||
// asin | |||
asinTab = make([]float64, asinTabsSize) | |||
asinDer1DivF1Tab = make([]float64, asinTabsSize) | |||
asinDer2DivF2Tab = make([]float64, asinTabsSize) | |||
asinDer3DivF3Tab = make([]float64, asinTabsSize) | |||
asinDer4DivF4Tab = make([]float64, asinTabsSize) | |||
for i := 0; i < asinTabsSize; i++ { | |||
// x: in [0,ASIN_MAX_VALUE_FOR_TABS]. | |||
x := float64(i) * asinDelta | |||
asinTab[i] = math.Asin(x) | |||
oneMinusXSqInv := 1.0 / (1 - x*x) | |||
oneMinusXSqInv05 := math.Sqrt(oneMinusXSqInv) | |||
oneMinusXSqInv15 := oneMinusXSqInv05 * oneMinusXSqInv | |||
oneMinusXSqInv25 := oneMinusXSqInv15 * oneMinusXSqInv | |||
oneMinusXSqInv35 := oneMinusXSqInv25 * oneMinusXSqInv | |||
asinDer1DivF1Tab[i] = oneMinusXSqInv05 | |||
asinDer2DivF2Tab[i] = (x * oneMinusXSqInv15) * oneDivF2 | |||
asinDer3DivF3Tab[i] = ((1 + 2*x*x) * oneMinusXSqInv25) * oneDivF3 | |||
asinDer4DivF4Tab[i] = ((5 + 2*x*(2+x*(5-2*x))) * oneMinusXSqInv35) * oneDivF4 | |||
} | |||
// earth radius | |||
a := 6378137.0 | |||
b := 6356752.31420 | |||
a2 := a * a | |||
b2 := b * b | |||
earthDiameterPerLatitude = make([]float64, radiusTabsSize) | |||
earthDiameterPerLatitude[0] = 2.0 * a / 1000 | |||
earthDiameterPerLatitude[radiusTabsSize-1] = 2.0 * b / 1000 | |||
for i := 1; i < radiusTabsSize-1; i++ { | |||
lat := math.Pi * float64(i) / (2*radiusTabsSize - 1) | |||
one := math.Pow(a2*math.Cos(lat), 2) | |||
two := math.Pow(b2*math.Sin(lat), 2) | |||
three := math.Pow(float64(a)*math.Cos(lat), 2) | |||
four := math.Pow(b*math.Sin(lat), 2) | |||
radius := math.Sqrt((one + two) / (three + four)) | |||
earthDiameterPerLatitude[i] = 2 * radius / 1000 | |||
} | |||
} | |||
// earthDiameter returns an estimation of the earth's diameter at the specified | |||
// latitude in kilometers | |||
func earthDiameter(lat float64) float64 { | |||
index := math.Mod(math.Abs(lat)*radiusIndexer+0.5, float64(len(earthDiameterPerLatitude))) | |||
if math.IsNaN(index) { | |||
return 0 | |||
} | |||
return earthDiameterPerLatitude[int(index)] | |||
} | |||
var pio2 = math.Pi / 2 | |||
func sin(a float64) float64 { | |||
return cos(a - pio2) | |||
} | |||
// cos is a sloppy math (faster) implementation of math.Cos | |||
func cos(a float64) float64 { | |||
if a < 0.0 { | |||
a = -a | |||
} | |||
if a > sinCosMaxValueForIntModulo { | |||
return math.Cos(a) | |||
} | |||
// index: possibly outside tables range. | |||
index := int(a*sinCosIndexer + 0.5) | |||
delta := (a - float64(index)*sinCosDeltaHi) - float64(index)*sinCosDeltaLo | |||
// Making sure index is within tables range. | |||
// Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo. | |||
index &= (sinCosTabsSize - 2) // index % (SIN_COS_TABS_SIZE-1) | |||
indexCos := cosTab[index] | |||
indexSin := sinTab[index] | |||
return indexCos + delta*(-indexSin+delta*(-indexCos*oneDivF2+delta*(indexSin*oneDivF3+delta*indexCos*oneDivF4))) | |||
} | |||
// asin is a sloppy math (faster) implementation of math.Asin | |||
func asin(a float64) float64 { | |||
var negateResult bool | |||
if a < 0 { | |||
a = -a | |||
negateResult = true | |||
} | |||
if a <= asinMaxValueForTabs { | |||
index := int(a*asinIndexer + 0.5) | |||
delta := a - float64(index)*asinDelta | |||
result := asinTab[index] + delta*(asinDer1DivF1Tab[index]+delta*(asinDer2DivF2Tab[index]+delta*(asinDer3DivF3Tab[index]+delta*asinDer4DivF4Tab[index]))) | |||
if negateResult { | |||
return -result | |||
} | |||
return result | |||
} | |||
// value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN | |||
// This part is derived from fdlibm. | |||
if a < 1 { | |||
t := (1.0 - a) * 0.5 | |||
p := t * (asinPs0 + t*(asinPs1+t*(asinPs2+t*(asinPs3+t*(asinPs4+t+asinPs5))))) | |||
q := 1.0 + t*(asinQs1+t*(asinQs2+t*(asinQs3+t*asinQs4))) | |||
s := math.Sqrt(t) | |||
z := s + s*(p/q) | |||
result := asinPio2Hi - ((z + z) - asinPio2Lo) | |||
if negateResult { | |||
return -result | |||
} | |||
return result | |||
} | |||
// value >= 1.0, or value is NaN | |||
if a == 1.0 { | |||
if negateResult { | |||
return -math.Pi / 2 | |||
} | |||
return math.Pi / 2 | |||
} | |||
return math.NaN() | |||
} |
@@ -49,6 +49,17 @@ func (b *Batch) Index(id string, data interface{}) error { | |||
return nil | |||
} | |||
// IndexAdvanced adds the specified index operation to the | |||
// batch which skips the mapping. NOTE: the bleve Index is not updated | |||
// until the batch is executed. | |||
func (b *Batch) IndexAdvanced(doc *document.Document) (err error) { | |||
if doc.ID == "" { | |||
return ErrorEmptyID | |||
} | |||
b.internal.Update(doc) | |||
return nil | |||
} | |||
// Delete adds the specified delete operation to the | |||
// batch. NOTE: the bleve Index is not updated until | |||
// the batch is executed. | |||
@@ -99,12 +110,15 @@ func (b *Batch) Reset() { | |||
// them. | |||
// | |||
// The DocumentMapping used to index a value is deduced by the following rules: | |||
// 1) If value implements Classifier interface, resolve the mapping from Type(). | |||
// 2) If value has a string field or value at IndexMapping.TypeField. | |||
// 1) If value implements mapping.bleveClassifier interface, resolve the mapping | |||
// from BleveType(). | |||
// 2) If value implements mapping.Classifier interface, resolve the mapping | |||
// from Type(). | |||
// 3) If value has a string field or value at IndexMapping.TypeField. | |||
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing | |||
// is described below. | |||
// 3) If IndexMapping.DefaultType is registered, return it. | |||
// 4) Return IndexMapping.DefaultMapping. | |||
// 4) If IndexMapping.DefaultType is registered, return it. | |||
// 5) Return IndexMapping.DefaultMapping. | |||
// | |||
// Each field or nested field of the value is identified by a string path, then | |||
// mapped to one or several FieldMappings which extract the result for analysis. |
@@ -48,6 +48,8 @@ type Index interface { | |||
Advanced() (store.KVStore, error) | |||
} | |||
type DocumentFieldTermVisitor func(field string, term []byte) | |||
type IndexReader interface { | |||
TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error) | |||
@@ -64,7 +66,7 @@ type IndexReader interface { | |||
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) | |||
Document(id string) (*document.Document, error) | |||
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error) | |||
DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error | |||
Fields() ([]string, error) | |||
@@ -90,7 +90,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult | |||
rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...) | |||
backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded) | |||
backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs)) | |||
// walk through the collated information and process | |||
// once for each indexed field (unique name) | |||
@@ -99,11 +99,11 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult | |||
includeTermVectors := fieldIncludeTermVectors[fieldIndex] | |||
// encode this field | |||
rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries) | |||
rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries) | |||
} | |||
// build the back index row | |||
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries) | |||
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries) | |||
rv.Rows = append(rv.Rows, backIndexRow) | |||
return rv |
@@ -127,10 +127,12 @@ func (i *IndexReader) DumpDoc(id string) chan interface{} { | |||
} | |||
// build sorted list of term keys | |||
keys := make(keyset, 0) | |||
for _, entry := range back.termEntries { | |||
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0) | |||
key := tfr.Key() | |||
keys = append(keys, key) | |||
for _, entry := range back.termsEntries { | |||
for i := range entry.Terms { | |||
tfr := NewTermFrequencyRow([]byte(entry.Terms[i]), uint16(*entry.Field), idBytes, 0, 0) | |||
key := tfr.Key() | |||
keys = append(keys, key) | |||
} | |||
} | |||
sort.Sort(keys) | |||
@@ -101,15 +101,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) { | |||
return | |||
} | |||
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { | |||
back, err := backIndexRowForDoc(i.kvreader, id) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if back == nil { | |||
return nil, nil | |||
} | |||
rv := make(index.FieldTerms, len(fields)) | |||
func (i *IndexReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error { | |||
fieldsMap := make(map[uint16]string, len(fields)) | |||
for _, f := range fields { | |||
id, ok := i.index.fieldCache.FieldNamed(f, false) | |||
@@ -117,12 +109,34 @@ func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []stri | |||
fieldsMap[id] = f | |||
} | |||
} | |||
for _, entry := range back.termEntries { | |||
if field, ok := fieldsMap[uint16(*entry.Field)]; ok { | |||
rv[field] = append(rv[field], *entry.Term) | |||
} | |||
tempRow := BackIndexRow{ | |||
doc: id, | |||
} | |||
keyBuf := GetRowBuffer() | |||
if tempRow.KeySize() > len(keyBuf) { | |||
keyBuf = make([]byte, 2*tempRow.KeySize()) | |||
} | |||
return rv, nil | |||
defer PutRowBuffer(keyBuf) | |||
keySize, err := tempRow.KeyTo(keyBuf) | |||
if err != nil { | |||
return err | |||
} | |||
value, err := i.kvreader.Get(keyBuf[:keySize]) | |||
if err != nil { | |||
return err | |||
} | |||
if value == nil { | |||
return nil | |||
} | |||
return visitBackIndexRow(value, func(field uint32, term []byte) { | |||
if field, ok := fieldsMap[uint16(field)]; ok { | |||
visitor(field, term) | |||
} | |||
}) | |||
} | |||
func (i *IndexReader) Fields() (fields []string, err error) { |
@@ -24,46 +24,57 @@ import ( | |||
) | |||
type UpsideDownCouchTermFieldReader struct { | |||
count uint64 | |||
indexReader *IndexReader | |||
iterator store.KVIterator | |||
term []byte | |||
tfrNext *TermFrequencyRow | |||
keyBuf []byte | |||
field uint16 | |||
count uint64 | |||
indexReader *IndexReader | |||
iterator store.KVIterator | |||
term []byte | |||
tfrNext *TermFrequencyRow | |||
tfrPrealloc TermFrequencyRow | |||
keyBuf []byte | |||
field uint16 | |||
includeTermVectors bool | |||
} | |||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { | |||
dictionaryRow := NewDictionaryRow(term, field, 0) | |||
val, err := indexReader.kvreader.Get(dictionaryRow.Key()) | |||
bufNeeded := termFrequencyRowKeySize(term, nil) | |||
if bufNeeded < dictionaryRowKeySize(term) { | |||
bufNeeded = dictionaryRowKeySize(term) | |||
} | |||
buf := make([]byte, bufNeeded) | |||
bufUsed := dictionaryRowKeyTo(buf, field, term) | |||
val, err := indexReader.kvreader.Get(buf[:bufUsed]) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if val == nil { | |||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) | |||
return &UpsideDownCouchTermFieldReader{ | |||
count: 0, | |||
term: term, | |||
tfrNext: &TermFrequencyRow{}, | |||
field: field, | |||
}, nil | |||
rv := &UpsideDownCouchTermFieldReader{ | |||
count: 0, | |||
term: term, | |||
field: field, | |||
includeTermVectors: includeTermVectors, | |||
} | |||
rv.tfrNext = &rv.tfrPrealloc | |||
return rv, nil | |||
} | |||
err = dictionaryRow.parseDictionaryV(val) | |||
count, err := dictionaryRowParseV(val) | |||
if err != nil { | |||
return nil, err | |||
} | |||
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0) | |||
it := indexReader.kvreader.PrefixIterator(tfr.Key()) | |||
bufUsed = termFrequencyRowKeyTo(buf, field, term, nil) | |||
it := indexReader.kvreader.PrefixIterator(buf[:bufUsed]) | |||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) | |||
return &UpsideDownCouchTermFieldReader{ | |||
indexReader: indexReader, | |||
iterator: it, | |||
count: dictionaryRow.count, | |||
term: term, | |||
field: field, | |||
indexReader: indexReader, | |||
iterator: it, | |||
count: count, | |||
term: term, | |||
field: field, | |||
includeTermVectors: includeTermVectors, | |||
}, nil | |||
} | |||
@@ -79,7 +90,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* | |||
if r.tfrNext != nil { | |||
r.iterator.Next() | |||
} else { | |||
r.tfrNext = &TermFrequencyRow{} | |||
r.tfrNext = &r.tfrPrealloc | |||
} | |||
key, val, valid := r.iterator.Current() | |||
if valid { | |||
@@ -88,7 +99,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = tfr.parseV(val) | |||
err = tfr.parseV(val, r.includeTermVectors) | |||
if err != nil { | |||
return nil, err | |||
} | |||
@@ -125,7 +136,7 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, pr | |||
if err != nil { | |||
return nil, err | |||
} | |||
err = tfr.parseV(val) | |||
err = tfr.parseV(val, r.includeTermVectors) | |||
if err != nil { | |||
return nil, err | |||
} |
@@ -254,14 +254,22 @@ func (dr *DictionaryRow) Key() []byte { | |||
} | |||
func (dr *DictionaryRow) KeySize() int { | |||
return len(dr.term) + 3 | |||
return dictionaryRowKeySize(dr.term) | |||
} | |||
func dictionaryRowKeySize(term []byte) int { | |||
return len(term) + 3 | |||
} | |||
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { | |||
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil | |||
} | |||
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int { | |||
buf[0] = 'd' | |||
binary.LittleEndian.PutUint16(buf[1:3], dr.field) | |||
size := copy(buf[3:], dr.term) | |||
return size + 3, nil | |||
binary.LittleEndian.PutUint16(buf[1:3], field) | |||
size := copy(buf[3:], term) | |||
return size + 3 | |||
} | |||
func (dr *DictionaryRow) Value() []byte { | |||
@@ -324,14 +332,22 @@ func (dr *DictionaryRow) parseDictionaryK(key []byte) error { | |||
} | |||
func (dr *DictionaryRow) parseDictionaryV(value []byte) error { | |||
count, nread := binary.Uvarint(value) | |||
if nread <= 0 { | |||
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) | |||
count, err := dictionaryRowParseV(value) | |||
if err != nil { | |||
return err | |||
} | |||
dr.count = count | |||
return nil | |||
} | |||
func dictionaryRowParseV(value []byte) (uint64, error) { | |||
count, nread := binary.Uvarint(value) | |||
if nread <= 0 { | |||
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) | |||
} | |||
return count, nil | |||
} | |||
// TERM FIELD FREQUENCY | |||
type TermVector struct { | |||
@@ -394,16 +410,24 @@ func (tfr *TermFrequencyRow) Key() []byte { | |||
} | |||
func (tfr *TermFrequencyRow) KeySize() int { | |||
return 3 + len(tfr.term) + 1 + len(tfr.doc) | |||
return termFrequencyRowKeySize(tfr.term, tfr.doc) | |||
} | |||
func termFrequencyRowKeySize(term, doc []byte) int { | |||
return 3 + len(term) + 1 + len(doc) | |||
} | |||
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { | |||
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil | |||
} | |||
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int { | |||
buf[0] = 't' | |||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) | |||
termLen := copy(buf[3:], tfr.term) | |||
binary.LittleEndian.PutUint16(buf[1:3], field) | |||
termLen := copy(buf[3:], term) | |||
buf[3+termLen] = ByteSeparator | |||
docLen := copy(buf[3+termLen+1:], tfr.doc) | |||
return 3 + termLen + 1 + docLen, nil | |||
docLen := copy(buf[3+termLen+1:], doc) | |||
return 3 + termLen + 1 + docLen | |||
} | |||
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) { | |||
@@ -538,7 +562,7 @@ func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error { | |||
return nil | |||
} | |||
func (tfr *TermFrequencyRow) parseV(value []byte) error { | |||
func (tfr *TermFrequencyRow) parseV(value []byte, includeTermVectors bool) error { | |||
var bytesRead int | |||
tfr.freq, bytesRead = binary.Uvarint(value) | |||
if bytesRead <= 0 { | |||
@@ -556,6 +580,10 @@ func (tfr *TermFrequencyRow) parseV(value []byte) error { | |||
tfr.norm = math.Float32frombits(uint32(norm)) | |||
tfr.vectors = nil | |||
if !includeTermVectors { | |||
return nil | |||
} | |||
var field uint64 | |||
field, bytesRead = binary.Uvarint(value[currOffset:]) | |||
for bytesRead > 0 { | |||
@@ -620,7 +648,7 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) { | |||
return nil, err | |||
} | |||
err = rv.parseV(value) | |||
err = rv.parseV(value, true) | |||
if err != nil { | |||
return nil, err | |||
} | |||
@@ -630,7 +658,7 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) { | |||
type BackIndexRow struct { | |||
doc []byte | |||
termEntries []*BackIndexTermEntry | |||
termsEntries []*BackIndexTermsEntry | |||
storedEntries []*BackIndexStoreEntry | |||
} | |||
@@ -638,10 +666,12 @@ func (br *BackIndexRow) AllTermKeys() [][]byte { | |||
if br == nil { | |||
return nil | |||
} | |||
rv := make([][]byte, len(br.termEntries)) | |||
for i, termEntry := range br.termEntries { | |||
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0) | |||
rv[i] = termRow.Key() | |||
rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely | |||
for _, termsEntry := range br.termsEntries { | |||
for i := range termsEntry.Terms { | |||
termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0) | |||
rv = append(rv, termRow.Key()) | |||
} | |||
} | |||
return rv | |||
} | |||
@@ -682,7 +712,7 @@ func (br *BackIndexRow) Value() []byte { | |||
func (br *BackIndexRow) ValueSize() int { | |||
birv := &BackIndexRowValue{ | |||
TermEntries: br.termEntries, | |||
TermsEntries: br.termsEntries, | |||
StoredEntries: br.storedEntries, | |||
} | |||
return birv.Size() | |||
@@ -690,20 +720,20 @@ func (br *BackIndexRow) ValueSize() int { | |||
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { | |||
birv := &BackIndexRowValue{ | |||
TermEntries: br.termEntries, | |||
TermsEntries: br.termsEntries, | |||
StoredEntries: br.storedEntries, | |||
} | |||
return birv.MarshalTo(buf) | |||
} | |||
func (br *BackIndexRow) String() string { | |||
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries) | |||
return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries) | |||
} | |||
func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { | |||
func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { | |||
return &BackIndexRow{ | |||
doc: docID, | |||
termEntries: entries, | |||
termsEntries: entries, | |||
storedEntries: storedFields, | |||
} | |||
} | |||
@@ -732,7 +762,7 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) { | |||
if err != nil { | |||
return nil, err | |||
} | |||
rv.termEntries = birv.TermEntries | |||
rv.termsEntries = birv.TermsEntries | |||
rv.storedEntries = birv.StoredEntries | |||
return &rv, nil | |||
@@ -851,3 +881,232 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) { | |||
rv.value = value[1:] | |||
return rv, nil | |||
} | |||
type backIndexFieldTermVisitor func(field uint32, term []byte) | |||
// visitBackIndexRow is designed to process a protobuf encoded | |||
// value, without creating unnecessary garbage. Instead values are passed | |||
// to a callback, inspected first, and only copied if necessary. | |||
// Due to the fact that this borrows from generated code, it must be marnually | |||
// updated if the protobuf definition changes. | |||
// | |||
// This code originates from: | |||
// func (m *BackIndexRowValue) Unmarshal(data []byte) error | |||
// the sections which create garbage or parse unintersting sections | |||
// have been commented out. This was done by design to allow for easier | |||
// merging in the future if that original function is regenerated | |||
func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error { | |||
l := len(data) | |||
iNdEx := 0 | |||
for iNdEx < l { | |||
var wire uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
wire |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
fieldNum := int32(wire >> 3) | |||
wireType := int(wire & 0x7) | |||
switch fieldNum { | |||
case 1: | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType) | |||
} | |||
var msglen int | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
msglen |= (int(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
postIndex := iNdEx + msglen | |||
if msglen < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
// dont parse term entries | |||
// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{}) | |||
// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { | |||
// return err | |||
// } | |||
// instead, inspect them | |||
if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil { | |||
return err | |||
} | |||
iNdEx = postIndex | |||
case 2: | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType) | |||
} | |||
var msglen int | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
msglen |= (int(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
postIndex := iNdEx + msglen | |||
if msglen < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
// don't parse stored entries | |||
// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{}) | |||
// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { | |||
// return err | |||
// } | |||
iNdEx = postIndex | |||
default: | |||
var sizeOfWire int | |||
for { | |||
sizeOfWire++ | |||
wire >>= 7 | |||
if wire == 0 { | |||
break | |||
} | |||
} | |||
iNdEx -= sizeOfWire | |||
skippy, err := skipUpsidedown(data[iNdEx:]) | |||
if err != nil { | |||
return err | |||
} | |||
if skippy < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if (iNdEx + skippy) > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
// don't track unrecognized data | |||
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) | |||
iNdEx += skippy | |||
} | |||
} | |||
return nil | |||
} | |||
// visitBackIndexRowFieldTerms is designed to process a protobuf encoded | |||
// sub-value within the BackIndexRowValue, without creating unnecessary garbage. | |||
// Instead values are passed to a callback, inspected first, and only copied if | |||
// necessary. Due to the fact that this borrows from generated code, it must | |||
// be marnually updated if the protobuf definition changes. | |||
// | |||
// This code originates from: | |||
// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error { | |||
// the sections which create garbage or parse uninteresting sections | |||
// have been commented out. This was done by design to allow for easier | |||
// merging in the future if that original function is regenerated | |||
func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error { | |||
var theField uint32 | |||
var hasFields [1]uint64 | |||
l := len(data) | |||
iNdEx := 0 | |||
for iNdEx < l { | |||
var wire uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
wire |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
fieldNum := int32(wire >> 3) | |||
wireType := int(wire & 0x7) | |||
switch fieldNum { | |||
case 1: | |||
if wireType != 0 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) | |||
} | |||
var v uint32 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
v |= (uint32(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
// m.Field = &v | |||
theField = v | |||
hasFields[0] |= uint64(0x00000001) | |||
case 2: | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType) | |||
} | |||
var stringLen uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
stringLen |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
postIndex := iNdEx + int(stringLen) | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
//m.Terms = append(m.Terms, string(data[iNdEx:postIndex])) | |||
callback(theField, data[iNdEx:postIndex]) | |||
iNdEx = postIndex | |||
default: | |||
var sizeOfWire int | |||
for { | |||
sizeOfWire++ | |||
wire >>= 7 | |||
if wire == 0 { | |||
break | |||
} | |||
} | |||
iNdEx -= sizeOfWire | |||
skippy, err := skipUpsidedown(data[iNdEx:]) | |||
if err != nil { | |||
return err | |||
} | |||
if skippy < 0 { | |||
return ErrInvalidLengthUpsidedown | |||
} | |||
if (iNdEx + skippy) > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) | |||
iNdEx += skippy | |||
} | |||
} | |||
// if hasFields[0]&uint64(0x00000001) == 0 { | |||
// return new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
// } | |||
return nil | |||
} |
@@ -45,7 +45,7 @@ const RowBufferSize = 4 * 1024 | |||
var VersionKey = []byte{'v'} | |||
const Version uint8 = 5 | |||
const Version uint8 = 7 | |||
var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version) | |||
@@ -499,44 +499,65 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { | |||
func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) { | |||
addRows = make([]UpsideDownCouchRow, 0, len(rows)) | |||
if backIndexRow == nil { | |||
addRows = addRows[0:len(rows)] | |||
for i, row := range rows { | |||
addRows[i] = row | |||
} | |||
return addRows, nil, nil | |||
} | |||
updateRows = make([]UpsideDownCouchRow, 0, len(rows)) | |||
deleteRows = make([]UpsideDownCouchRow, 0, len(rows)) | |||
existingTermKeys := make(map[string]bool) | |||
for _, key := range backIndexRow.AllTermKeys() { | |||
existingTermKeys[string(key)] = true | |||
var existingTermKeys map[string]struct{} | |||
backIndexTermKeys := backIndexRow.AllTermKeys() | |||
if len(backIndexTermKeys) > 0 { | |||
existingTermKeys = make(map[string]struct{}, len(backIndexTermKeys)) | |||
for _, key := range backIndexTermKeys { | |||
existingTermKeys[string(key)] = struct{}{} | |||
} | |||
} | |||
existingStoredKeys := make(map[string]bool) | |||
for _, key := range backIndexRow.AllStoredKeys() { | |||
existingStoredKeys[string(key)] = true | |||
var existingStoredKeys map[string]struct{} | |||
backIndexStoredKeys := backIndexRow.AllStoredKeys() | |||
if len(backIndexStoredKeys) > 0 { | |||
existingStoredKeys = make(map[string]struct{}, len(backIndexStoredKeys)) | |||
for _, key := range backIndexStoredKeys { | |||
existingStoredKeys[string(key)] = struct{}{} | |||
} | |||
} | |||
keyBuf := GetRowBuffer() | |||
for _, row := range rows { | |||
switch row := row.(type) { | |||
case *TermFrequencyRow: | |||
if row.KeySize() > len(keyBuf) { | |||
keyBuf = make([]byte, row.KeySize()) | |||
} | |||
keySize, _ := row.KeyTo(keyBuf) | |||
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { | |||
updateRows = append(updateRows, row) | |||
delete(existingTermKeys, string(keyBuf[:keySize])) | |||
} else { | |||
addRows = append(addRows, row) | |||
if existingTermKeys != nil { | |||
if row.KeySize() > len(keyBuf) { | |||
keyBuf = make([]byte, row.KeySize()) | |||
} | |||
keySize, _ := row.KeyTo(keyBuf) | |||
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { | |||
updateRows = append(updateRows, row) | |||
delete(existingTermKeys, string(keyBuf[:keySize])) | |||
continue | |||
} | |||
} | |||
addRows = append(addRows, row) | |||
case *StoredRow: | |||
if row.KeySize() > len(keyBuf) { | |||
keyBuf = make([]byte, row.KeySize()) | |||
} | |||
keySize, _ := row.KeyTo(keyBuf) | |||
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { | |||
updateRows = append(updateRows, row) | |||
delete(existingStoredKeys, string(keyBuf[:keySize])) | |||
} else { | |||
addRows = append(addRows, row) | |||
if existingStoredKeys != nil { | |||
if row.KeySize() > len(keyBuf) { | |||
keyBuf = make([]byte, row.KeySize()) | |||
} | |||
keySize, _ := row.KeyTo(keyBuf) | |||
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { | |||
updateRows = append(updateRows, row) | |||
delete(existingStoredKeys, string(keyBuf[:keySize])) | |||
continue | |||
} | |||
} | |||
addRows = append(addRows, row) | |||
default: | |||
updateRows = append(updateRows, row) | |||
} | |||
@@ -583,33 +604,41 @@ func encodeFieldType(f document.Field) byte { | |||
fieldType = 'd' | |||
case *document.BooleanField: | |||
fieldType = 'b' | |||
case *document.GeoPointField: | |||
fieldType = 'g' | |||
case *document.CompositeField: | |||
fieldType = 'c' | |||
} | |||
return fieldType | |||
} | |||
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) { | |||
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermsEntries []*BackIndexTermsEntry) ([]index.IndexRow, []*BackIndexTermsEntry) { | |||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) | |||
termFreqRows := make([]TermFrequencyRow, len(tokenFreqs)) | |||
termFreqRowsUsed := 0 | |||
terms := make([]string, 0, len(tokenFreqs)) | |||
for k, tf := range tokenFreqs { | |||
var termFreqRow *TermFrequencyRow | |||
termFreqRow := &termFreqRows[termFreqRowsUsed] | |||
termFreqRowsUsed++ | |||
InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID, | |||
uint64(frequencyFromTokenFreq(tf)), fieldNorm) | |||
if includeTermVectors { | |||
var tv []*TermVector | |||
tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows) | |||
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv) | |||
} else { | |||
termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm) | |||
termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows) | |||
} | |||
// record the back index entry | |||
backIndexTermEntry := BackIndexTermEntry{Term: proto.String(k), Field: proto.Uint32(uint32(fieldIndex))} | |||
backIndexTermEntries = append(backIndexTermEntries, &backIndexTermEntry) | |||
terms = append(terms, k) | |||
rows = append(rows, termFreqRow) | |||
} | |||
backIndexTermsEntry := BackIndexTermsEntry{Field: proto.Uint32(uint32(fieldIndex)), Terms: terms} | |||
backIndexTermsEntries = append(backIndexTermsEntries, &backIndexTermsEntry) | |||
return rows, backIndexTermEntries | |||
return rows, backIndexTermsEntries | |||
} | |||
func (udc *UpsideDownCouch) Delete(id string) (err error) { | |||
@@ -682,9 +711,11 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) { | |||
func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow { | |||
idBytes := []byte(id) | |||
for _, backIndexEntry := range backIndexRow.termEntries { | |||
tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), idBytes, 0, 0) | |||
deleteRows = append(deleteRows, tfr) | |||
for _, backIndexEntry := range backIndexRow.termsEntries { | |||
for i := range backIndexEntry.Terms { | |||
tfr := NewTermFrequencyRow([]byte(backIndexEntry.Terms[i]), uint16(*backIndexEntry.Field), idBytes, 0, 0) | |||
deleteRows = append(deleteRows, tfr) | |||
} | |||
} | |||
for _, se := range backIndexRow.storedEntries { | |||
sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil) | |||
@@ -706,6 +737,8 @@ func decodeFieldType(typ byte, name string, pos []uint64, value []byte) document | |||
return document.NewDateTimeFieldFromBytes(name, pos, value) | |||
case 'b': | |||
return document.NewBooleanFieldFromBytes(name, pos, value) | |||
case 'g': | |||
return document.NewGeoPointFieldFromBytes(name, pos, value) | |||
} | |||
return nil | |||
} | |||
@@ -715,6 +748,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int { | |||
} | |||
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) { | |||
a := make([]TermVector, len(tf.Locations)) | |||
rv := make([]*TermVector, len(tf.Locations)) | |||
for i, l := range tf.Locations { | |||
@@ -727,14 +761,14 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis. | |||
rows = append(rows, newFieldRow) | |||
} | |||
} | |||
tv := TermVector{ | |||
a[i] = TermVector{ | |||
field: fieldIndex, | |||
arrayPositions: l.ArrayPositions, | |||
pos: uint64(l.Position), | |||
start: uint64(l.Start), | |||
end: uint64(l.End), | |||
} | |||
rv[i] = &tv | |||
rv[i] = &a[i] | |||
} | |||
return rv, rows | |||
@@ -745,18 +779,19 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) [] | |||
return nil | |||
} | |||
a := make([]index.TermFieldVector, len(in)) | |||
rv := make([]*index.TermFieldVector, len(in)) | |||
for i, tv := range in { | |||
fieldName := udc.fieldCache.FieldIndexed(tv.field) | |||
tfv := index.TermFieldVector{ | |||
a[i] = index.TermFieldVector{ | |||
Field: fieldName, | |||
ArrayPositions: tv.arrayPositions, | |||
Pos: tv.pos, | |||
Start: tv.start, | |||
End: tv.end, | |||
} | |||
rv[i] = &tfv | |||
rv[i] = &a[i] | |||
} | |||
return rv | |||
} | |||
@@ -1008,7 +1043,7 @@ func init() { | |||
func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) { | |||
// use a temporary row structure to build key | |||
tempRow := &BackIndexRow{ | |||
tempRow := BackIndexRow{ | |||
doc: docID, | |||
} | |||
@@ -3,15 +3,15 @@ | |||
// DO NOT EDIT! | |||
/* | |||
Package upsidedown is a generated protocol buffer package. | |||
Package upsidedown is a generated protocol buffer package. | |||
It is generated from these files: | |||
upsidedown.proto | |||
It is generated from these files: | |||
upsidedown.proto | |||
It has these top-level messages: | |||
BackIndexTermEntry | |||
BackIndexStoreEntry | |||
BackIndexRowValue | |||
It has these top-level messages: | |||
BackIndexTermsEntry | |||
BackIndexStoreEntry | |||
BackIndexRowValue | |||
*/ | |||
package upsidedown | |||
@@ -26,30 +26,30 @@ import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto" | |||
var _ = proto.Marshal | |||
var _ = math.Inf | |||
type BackIndexTermEntry struct { | |||
Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"` | |||
Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"` | |||
XXX_unrecognized []byte `json:"-"` | |||
type BackIndexTermsEntry struct { | |||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"` | |||
Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"` | |||
XXX_unrecognized []byte `json:"-"` | |||
} | |||
func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} } | |||
func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) } | |||
func (*BackIndexTermEntry) ProtoMessage() {} | |||
func (m *BackIndexTermsEntry) Reset() { *m = BackIndexTermsEntry{} } | |||
func (m *BackIndexTermsEntry) String() string { return proto.CompactTextString(m) } | |||
func (*BackIndexTermsEntry) ProtoMessage() {} | |||
func (m *BackIndexTermEntry) GetTerm() string { | |||
if m != nil && m.Term != nil { | |||
return *m.Term | |||
} | |||
return "" | |||
} | |||
func (m *BackIndexTermEntry) GetField() uint32 { | |||
func (m *BackIndexTermsEntry) GetField() uint32 { | |||
if m != nil && m.Field != nil { | |||
return *m.Field | |||
} | |||
return 0 | |||
} | |||
func (m *BackIndexTermsEntry) GetTerms() []string { | |||
if m != nil { | |||
return m.Terms | |||
} | |||
return nil | |||
} | |||
type BackIndexStoreEntry struct { | |||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"` | |||
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"` | |||
@@ -75,7 +75,7 @@ func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 { | |||
} | |||
type BackIndexRowValue struct { | |||
TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"` | |||
TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"` | |||
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"` | |||
XXX_unrecognized []byte `json:"-"` | |||
} | |||
@@ -84,9 +84,9 @@ func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} } | |||
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) } | |||
func (*BackIndexRowValue) ProtoMessage() {} | |||
func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry { | |||
func (m *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry { | |||
if m != nil { | |||
return m.TermEntries | |||
return m.TermsEntries | |||
} | |||
return nil | |||
} | |||
@@ -98,7 +98,7 @@ func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry { | |||
return nil | |||
} | |||
func (m *BackIndexTermEntry) Unmarshal(data []byte) error { | |||
func (m *BackIndexTermsEntry) Unmarshal(data []byte) error { | |||
var hasFields [1]uint64 | |||
l := len(data) | |||
iNdEx := 0 | |||
@@ -119,47 +119,45 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error { | |||
wireType := int(wire & 0x7) | |||
switch fieldNum { | |||
case 1: | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) | |||
if wireType != 0 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) | |||
} | |||
var stringLen uint64 | |||
var v uint32 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
stringLen |= (uint64(b) & 0x7F) << shift | |||
v |= (uint32(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
postIndex := iNdEx + int(stringLen) | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
s := string(data[iNdEx:postIndex]) | |||
m.Term = &s | |||
iNdEx = postIndex | |||
m.Field = &v | |||
hasFields[0] |= uint64(0x00000001) | |||
case 2: | |||
if wireType != 0 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType) | |||
} | |||
var v uint32 | |||
var stringLen uint64 | |||
for shift := uint(0); ; shift += 7 { | |||
if iNdEx >= l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
b := data[iNdEx] | |||
iNdEx++ | |||
v |= (uint32(b) & 0x7F) << shift | |||
stringLen |= (uint64(b) & 0x7F) << shift | |||
if b < 0x80 { | |||
break | |||
} | |||
} | |||
m.Field = &v | |||
hasFields[0] |= uint64(0x00000002) | |||
postIndex := iNdEx + int(stringLen) | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
m.Terms = append(m.Terms, string(data[iNdEx:postIndex])) | |||
iNdEx = postIndex | |||
default: | |||
var sizeOfWire int | |||
for { | |||
@@ -187,9 +185,6 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error { | |||
if hasFields[0]&uint64(0x00000001) == 0 { | |||
return new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} | |||
if hasFields[0]&uint64(0x00000002) == 0 { | |||
return new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} | |||
return nil | |||
} | |||
@@ -299,7 +294,7 @@ func (m *BackIndexRowValue) Unmarshal(data []byte) error { | |||
switch fieldNum { | |||
case 1: | |||
if wireType != 2 { | |||
return fmt.Errorf("proto: wrong wireType = %d for field TermEntries", wireType) | |||
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType) | |||
} | |||
var msglen int | |||
for shift := uint(0); ; shift += 7 { | |||
@@ -320,8 +315,8 @@ func (m *BackIndexRowValue) Unmarshal(data []byte) error { | |||
if postIndex > l { | |||
return io.ErrUnexpectedEOF | |||
} | |||
m.TermEntries = append(m.TermEntries, &BackIndexTermEntry{}) | |||
if err := m.TermEntries[len(m.TermEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { | |||
m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{}) | |||
if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { | |||
return err | |||
} | |||
iNdEx = postIndex | |||
@@ -472,16 +467,18 @@ var ( | |||
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling") | |||
) | |||
func (m *BackIndexTermEntry) Size() (n int) { | |||
func (m *BackIndexTermsEntry) Size() (n int) { | |||
var l int | |||
_ = l | |||
if m.Term != nil { | |||
l = len(*m.Term) | |||
n += 1 + l + sovUpsidedown(uint64(l)) | |||
} | |||
if m.Field != nil { | |||
n += 1 + sovUpsidedown(uint64(*m.Field)) | |||
} | |||
if len(m.Terms) > 0 { | |||
for _, s := range m.Terms { | |||
l = len(s) | |||
n += 1 + l + sovUpsidedown(uint64(l)) | |||
} | |||
} | |||
if m.XXX_unrecognized != nil { | |||
n += len(m.XXX_unrecognized) | |||
} | |||
@@ -508,8 +505,8 @@ func (m *BackIndexStoreEntry) Size() (n int) { | |||
func (m *BackIndexRowValue) Size() (n int) { | |||
var l int | |||
_ = l | |||
if len(m.TermEntries) > 0 { | |||
for _, e := range m.TermEntries { | |||
if len(m.TermsEntries) > 0 { | |||
for _, e := range m.TermsEntries { | |||
l = e.Size() | |||
n += 1 + l + sovUpsidedown(uint64(l)) | |||
} | |||
@@ -539,7 +536,7 @@ func sovUpsidedown(x uint64) (n int) { | |||
func sozUpsidedown(x uint64) (n int) { | |||
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63)))) | |||
} | |||
func (m *BackIndexTermEntry) Marshal() (data []byte, err error) { | |||
func (m *BackIndexTermsEntry) Marshal() (data []byte, err error) { | |||
size := m.Size() | |||
data = make([]byte, size) | |||
n, err := m.MarshalTo(data) | |||
@@ -549,26 +546,33 @@ func (m *BackIndexTermEntry) Marshal() (data []byte, err error) { | |||
return data[:n], nil | |||
} | |||
func (m *BackIndexTermEntry) MarshalTo(data []byte) (n int, err error) { | |||
func (m *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) { | |||
var i int | |||
_ = i | |||
var l int | |||
_ = l | |||
if m.Term == nil { | |||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} else { | |||
data[i] = 0xa | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(len(*m.Term))) | |||
i += copy(data[i:], *m.Term) | |||
} | |||
if m.Field == nil { | |||
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) | |||
} else { | |||
data[i] = 0x10 | |||
data[i] = 0x8 | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) | |||
} | |||
if len(m.Terms) > 0 { | |||
for _, s := range m.Terms { | |||
data[i] = 0x12 | |||
i++ | |||
l = len(s) | |||
for l >= 1<<7 { | |||
data[i] = uint8(uint64(l)&0x7f | 0x80) | |||
l >>= 7 | |||
i++ | |||
} | |||
data[i] = uint8(l) | |||
i++ | |||
i += copy(data[i:], s) | |||
} | |||
} | |||
if m.XXX_unrecognized != nil { | |||
i += copy(data[i:], m.XXX_unrecognized) | |||
} | |||
@@ -625,8 +629,8 @@ func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) { | |||
_ = i | |||
var l int | |||
_ = l | |||
if len(m.TermEntries) > 0 { | |||
for _, msg := range m.TermEntries { | |||
if len(m.TermsEntries) > 0 { | |||
for _, msg := range m.TermsEntries { | |||
data[i] = 0xa | |||
i++ | |||
i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) |
@@ -1,6 +1,6 @@ | |||
message BackIndexTermEntry { | |||
required string term = 1; | |||
required uint32 field = 2; | |||
message BackIndexTermsEntry { | |||
required uint32 field = 1; | |||
repeated string terms = 2; | |||
} | |||
message BackIndexStoreEntry { | |||
@@ -9,6 +9,6 @@ message BackIndexStoreEntry { | |||
} | |||
message BackIndexRowValue { | |||
repeated BackIndexTermEntry termEntries = 1; | |||
repeated BackIndexTermsEntry termsEntries = 1; | |||
repeated BackIndexStoreEntry storedEntries = 2; | |||
} |
@@ -425,14 +425,15 @@ func (i *indexAliasImpl) Swap(in, out []Index) { | |||
// could be slower in remote usages. | |||
func createChildSearchRequest(req *SearchRequest) *SearchRequest { | |||
rv := SearchRequest{ | |||
Query: req.Query, | |||
Size: req.Size + req.From, | |||
From: 0, | |||
Highlight: req.Highlight, | |||
Fields: req.Fields, | |||
Facets: req.Facets, | |||
Explain: req.Explain, | |||
Sort: req.Sort, | |||
Query: req.Query, | |||
Size: req.Size + req.From, | |||
From: 0, | |||
Highlight: req.Highlight, | |||
Fields: req.Fields, | |||
Facets: req.Facets, | |||
Explain: req.Explain, | |||
Sort: req.Sort.Copy(), | |||
IncludeLocations: req.IncludeLocations, | |||
} | |||
return &rv | |||
} |
@@ -253,6 +253,24 @@ func (i *indexImpl) Index(id string, data interface{}) (err error) { | |||
return | |||
} | |||
// IndexAdvanced takes a document.Document object | |||
// skips the mapping and indexes it. | |||
func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) { | |||
if doc.ID == "" { | |||
return ErrorEmptyID | |||
} | |||
i.mutex.RLock() | |||
defer i.mutex.RUnlock() | |||
if !i.open { | |||
return ErrorIndexClosed | |||
} | |||
err = i.i.Update(doc) | |||
return | |||
} | |||
// Delete entries for the specified identifier from | |||
// the index. | |||
func (i *indexImpl) Delete(id string) (err error) { | |||
@@ -370,7 +388,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
} | |||
}() | |||
searcher, err := req.Query.Searcher(indexReader, i.m, req.Explain) | |||
searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{ | |||
Explain: req.Explain, | |||
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil, | |||
}) | |||
if err != nil { | |||
return nil, err | |||
} | |||
@@ -461,6 +482,14 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||
if err == nil { | |||
value = boolean | |||
} | |||
case *document.GeoPointField: | |||
lon, err := docF.Lon() | |||
if err == nil { | |||
lat, err := docF.Lat() | |||
if err == nil { | |||
value = []float64{lon, lat} | |||
} | |||
} | |||
} | |||
if value != nil { | |||
hit.AddFieldValue(docF.Name(), value) |
@@ -59,3 +59,7 @@ func NewDateTimeFieldMapping() *mapping.FieldMapping { | |||
func NewBooleanFieldMapping() *mapping.FieldMapping { | |||
return mapping.NewBooleanFieldMapping() | |||
} | |||
func NewGeoPointFieldMapping() *mapping.FieldMapping { | |||
return mapping.NewGeoPointFieldMapping() | |||
} |
@@ -15,6 +15,7 @@ | |||
package mapping | |||
import ( | |||
"encoding" | |||
"encoding/json" | |||
"fmt" | |||
"reflect" | |||
@@ -75,7 +76,7 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error { | |||
} | |||
} | |||
switch field.Type { | |||
case "text", "datetime", "number", "boolean": | |||
case "text", "datetime", "number", "boolean", "geopoint": | |||
default: | |||
return fmt.Errorf("unknown field type: '%s'", field.Type) | |||
} | |||
@@ -481,9 +482,57 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, | |||
fieldMapping := newDateTimeFieldMappingDynamic(context.im) | |||
fieldMapping.processTime(property, pathString, path, indexes, context) | |||
} | |||
case encoding.TextMarshaler: | |||
txt, err := property.MarshalText() | |||
if err == nil && subDocMapping != nil { | |||
// index by explicit mapping | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
if fieldMapping.Type == "text" { | |||
fieldMapping.processString(string(txt), pathString, path, indexes, context) | |||
} | |||
} | |||
} | |||
dm.walkDocument(property, path, indexes, context) | |||
default: | |||
if subDocMapping != nil { | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
if fieldMapping.Type == "geopoint" { | |||
fieldMapping.processGeoPoint(property, pathString, path, indexes, context) | |||
} | |||
} | |||
} | |||
dm.walkDocument(property, path, indexes, context) | |||
} | |||
case reflect.Map: | |||
if subDocMapping != nil { | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
if fieldMapping.Type == "geopoint" { | |||
fieldMapping.processGeoPoint(property, pathString, path, indexes, context) | |||
} | |||
} | |||
} | |||
dm.walkDocument(property, path, indexes, context) | |||
case reflect.Ptr: | |||
if !propertyValue.IsNil() { | |||
switch property := property.(type) { | |||
case encoding.TextMarshaler: | |||
txt, err := property.MarshalText() | |||
if err == nil && subDocMapping != nil { | |||
// index by explicit mapping | |||
for _, fieldMapping := range subDocMapping.Fields { | |||
if fieldMapping.Type == "text" { | |||
fieldMapping.processString(string(txt), pathString, path, indexes, context) | |||
} | |||
} | |||
} else { | |||
dm.walkDocument(property, path, indexes, context) | |||
} | |||
default: | |||
dm.walkDocument(property, path, indexes, context) | |||
} | |||
} | |||
default: | |||
dm.walkDocument(property, path, indexes, context) | |||
} |
@@ -21,6 +21,7 @@ import ( | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/document" | |||
"github.com/blevesearch/bleve/geo" | |||
) | |||
// control the default behavior for dynamic fields (those not explicitly mapped) | |||
@@ -124,6 +125,16 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { | |||
return rv | |||
} | |||
// NewGeoPointFieldMapping returns a default field mapping for geo points | |||
func NewGeoPointFieldMapping() *FieldMapping { | |||
return &FieldMapping{ | |||
Type: "geopoint", | |||
Store: true, | |||
Index: true, | |||
IncludeInAll: true, | |||
} | |||
} | |||
// Options returns the indexing options for this field. | |||
func (fm *FieldMapping) Options() document.IndexingOptions { | |||
var rv document.IndexingOptions | |||
@@ -208,6 +219,20 @@ func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string | |||
} | |||
} | |||
func (fm *FieldMapping) processGeoPoint(propertyMightBeGeoPoint interface{}, pathString string, path []string, indexes []uint64, context *walkContext) { | |||
lon, lat, found := geo.ExtractGeoPoint(propertyMightBeGeoPoint) | |||
if found { | |||
fieldName := getFieldName(pathString, path, fm) | |||
options := fm.Options() | |||
field := document.NewGeoPointFieldWithIndexingOptions(fieldName, indexes, lon, lat, options) | |||
context.doc.AddField(field) | |||
if !fm.IncludeInAll { | |||
context.excludedFromAll = append(context.excludedFromAll, fieldName) | |||
} | |||
} | |||
} | |||
func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { | |||
analyzerName := fm.Analyzer | |||
if analyzerName == "" { |
@@ -289,7 +289,12 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error { | |||
} | |||
func (im *IndexMappingImpl) determineType(data interface{}) string { | |||
// first see if the object implements Classifier | |||
// first see if the object implements bleveClassifier | |||
bleveClassifier, ok := data.(bleveClassifier) | |||
if ok { | |||
return bleveClassifier.BleveType() | |||
} | |||
// next see if the object implements Classifier | |||
classifier, ok := data.(Classifier) | |||
if ok { | |||
return classifier.Type() |
@@ -22,12 +22,21 @@ import ( | |||
"github.com/blevesearch/bleve/document" | |||
) | |||
// A Classifier is an interface describing any object | |||
// which knows how to identify its own type. | |||
// A Classifier is an interface describing any object which knows how to | |||
// identify its own type. Alternatively, if a struct already has a Type | |||
// field or method in conflict, one can use BleveType instead. | |||
type Classifier interface { | |||
Type() string | |||
} | |||
// A bleveClassifier is an interface describing any object which knows how | |||
// to identify its own type. This is introduced as an alternative to the | |||
// Classifier interface which often has naming conflicts with existing | |||
// structures. | |||
type bleveClassifier interface { | |||
BleveType() string | |||
} | |||
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags) | |||
// SetLog sets the logger used for logging |
@@ -0,0 +1,43 @@ | |||
package numeric | |||
var interleaveMagic = []uint64{ | |||
0x5555555555555555, | |||
0x3333333333333333, | |||
0x0F0F0F0F0F0F0F0F, | |||
0x00FF00FF00FF00FF, | |||
0x0000FFFF0000FFFF, | |||
0x00000000FFFFFFFF, | |||
0xAAAAAAAAAAAAAAAA, | |||
} | |||
var interleaveShift = []uint{1, 2, 4, 8, 16} | |||
// Interleave the first 32 bits of each uint64 | |||
// apdated from org.apache.lucene.util.BitUtil | |||
// whcih was adapted from: | |||
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN | |||
func Interleave(v1, v2 uint64) uint64 { | |||
v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4] | |||
v1 = (v1 | (v1 << interleaveShift[3])) & interleaveMagic[3] | |||
v1 = (v1 | (v1 << interleaveShift[2])) & interleaveMagic[2] | |||
v1 = (v1 | (v1 << interleaveShift[1])) & interleaveMagic[1] | |||
v1 = (v1 | (v1 << interleaveShift[0])) & interleaveMagic[0] | |||
v2 = (v2 | (v2 << interleaveShift[4])) & interleaveMagic[4] | |||
v2 = (v2 | (v2 << interleaveShift[3])) & interleaveMagic[3] | |||
v2 = (v2 | (v2 << interleaveShift[2])) & interleaveMagic[2] | |||
v2 = (v2 | (v2 << interleaveShift[1])) & interleaveMagic[1] | |||
v2 = (v2 | (v2 << interleaveShift[0])) & interleaveMagic[0] | |||
return (v2 << 1) | v1 | |||
} | |||
// Deinterleave the 32-bit value starting at position 0 | |||
// to get the other 32-bit value, shift it by 1 first | |||
func Deinterleave(b uint64) uint64 { | |||
b &= interleaveMagic[0] | |||
b = (b ^ (b >> interleaveShift[0])) & interleaveMagic[1] | |||
b = (b ^ (b >> interleaveShift[1])) & interleaveMagic[2] | |||
b = (b ^ (b >> interleaveShift[2])) & interleaveMagic[3] | |||
b = (b ^ (b >> interleaveShift[3])) & interleaveMagic[4] | |||
b = (b ^ (b >> interleaveShift[4])) & interleaveMagic[5] | |||
return b | |||
} |
@@ -139,6 +139,23 @@ func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive | |||
return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive) | |||
} | |||
// NewTermRangeQuery creates a new Query for ranges | |||
// of text terms. | |||
// Either, but not both endpoints can be "". | |||
// The minimum value is inclusive. | |||
// The maximum value is exclusive. | |||
func NewTermRangeQuery(min, max string) *query.TermRangeQuery { | |||
return query.NewTermRangeQuery(min, max) | |||
} | |||
// NewTermRangeInclusiveQuery creates a new Query for ranges | |||
// of text terms. | |||
// Either, but not both endpoints can be "". | |||
// Control endpoint inclusion with inclusiveMin, inclusiveMax. | |||
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *query.TermRangeQuery { | |||
return query.NewTermRangeInclusiveQuery(min, max, minInclusive, maxInclusive) | |||
} | |||
// NewPhraseQuery creates a new Query for finding | |||
// exact term phrases in the index. | |||
// The provided terms must exist in the correct | |||
@@ -184,3 +201,18 @@ func NewTermQuery(term string) *query.TermQuery { | |||
func NewWildcardQuery(wildcard string) *query.WildcardQuery { | |||
return query.NewWildcardQuery(wildcard) | |||
} | |||
// NewGeoBoundingBoxQuery creates a new Query for performing geo bounding | |||
// box searches. The arguments describe the position of the box and documents | |||
// which have an indexed geo point inside the box will be returned. | |||
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *query.GeoBoundingBoxQuery { | |||
return query.NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat) | |||
} | |||
// NewGeoDistanceQuery creates a new Query for performing geo bounding | |||
// box searches. The arguments describe a position and a distance. Documents | |||
// which have an indexed geo point which is less than or equal to the provided | |||
// distance from the given position will be returned. | |||
func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQuery { | |||
return query.NewGeoDistanceQuery(lon, lat, distance) | |||
} |
@@ -20,10 +20,16 @@ import ( | |||
"time" | |||
"github.com/blevesearch/bleve/analysis" | |||
"github.com/blevesearch/bleve/analysis/datetime/optional" | |||
"github.com/blevesearch/bleve/registry" | |||
"github.com/blevesearch/bleve/search" | |||
"github.com/blevesearch/bleve/search/query" | |||
) | |||
var cache = registry.NewCache() | |||
const defaultDateTimeParser = optional.Name | |||
type numericRange struct { | |||
Name string `json:"name,omitempty"` | |||
Min *float64 `json:"min,omitempty"` | |||
@@ -105,26 +111,41 @@ type FacetRequest struct { | |||
} | |||
func (fr *FacetRequest) Validate() error { | |||
if len(fr.NumericRanges) > 0 && len(fr.DateTimeRanges) > 0 { | |||
nrCount := len(fr.NumericRanges) | |||
drCount := len(fr.DateTimeRanges) | |||
if nrCount > 0 && drCount > 0 { | |||
return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both") | |||
} | |||
nrNames := map[string]interface{}{} | |||
for _, nr := range fr.NumericRanges { | |||
if _, ok := nrNames[nr.Name]; ok { | |||
return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name) | |||
if nrCount > 0 { | |||
nrNames := map[string]interface{}{} | |||
for _, nr := range fr.NumericRanges { | |||
if _, ok := nrNames[nr.Name]; ok { | |||
return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name) | |||
} | |||
nrNames[nr.Name] = struct{}{} | |||
if nr.Min == nil && nr.Max == nil { | |||
return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name) | |||
} | |||
} | |||
nrNames[nr.Name] = struct{}{} | |||
} | |||
drNames := map[string]interface{}{} | |||
for _, dr := range fr.DateTimeRanges { | |||
if _, ok := drNames[dr.Name]; ok { | |||
return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name) | |||
} else { | |||
dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser) | |||
if err != nil { | |||
return err | |||
} | |||
drNames := map[string]interface{}{} | |||
for _, dr := range fr.DateTimeRanges { | |||
if _, ok := drNames[dr.Name]; ok { | |||
return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name) | |||
} | |||
drNames[dr.Name] = struct{}{} | |||
start, end := dr.ParseDates(dateTimeParser) | |||
if start.IsZero() && end.IsZero() { | |||
return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name) | |||
} | |||
} | |||
drNames[dr.Name] = struct{}{} | |||
} | |||
return nil | |||
} | |||
@@ -149,6 +170,16 @@ func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) { | |||
fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end}) | |||
} | |||
// AddDateTimeRangeString adds a bucket to a field | |||
// containing date values. | |||
func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) { | |||
if fr.DateTimeRanges == nil { | |||
fr.DateTimeRanges = make([]*dateTimeRange, 0, 1) | |||
} | |||
fr.DateTimeRanges = append(fr.DateTimeRanges, | |||
&dateTimeRange{Name: name, startString: start, endString: end}) | |||
} | |||
// AddNumericRange adds a bucket to a field | |||
// containing numeric values. Documents with a | |||
// numeric value falling into this range are | |||
@@ -219,14 +250,15 @@ func (h *HighlightRequest) AddField(field string) { | |||
// | |||
// A special field named "*" can be used to return all fields. | |||
type SearchRequest struct { | |||
Query query.Query `json:"query"` | |||
Size int `json:"size"` | |||
From int `json:"from"` | |||
Highlight *HighlightRequest `json:"highlight"` | |||
Fields []string `json:"fields"` | |||
Facets FacetsRequest `json:"facets"` | |||
Explain bool `json:"explain"` | |||
Sort search.SortOrder `json:"sort"` | |||
Query query.Query `json:"query"` | |||
Size int `json:"size"` | |||
From int `json:"from"` | |||
Highlight *HighlightRequest `json:"highlight"` | |||
Fields []string `json:"fields"` | |||
Facets FacetsRequest `json:"facets"` | |||
Explain bool `json:"explain"` | |||
Sort search.SortOrder `json:"sort"` | |||
IncludeLocations bool `json:"includeLocations"` | |||
} | |||
func (r *SearchRequest) Validate() error { | |||
@@ -267,14 +299,15 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) { | |||
// a SearchRequest | |||
func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||
var temp struct { | |||
Q json.RawMessage `json:"query"` | |||
Size *int `json:"size"` | |||
From int `json:"from"` | |||
Highlight *HighlightRequest `json:"highlight"` | |||
Fields []string `json:"fields"` | |||
Facets FacetsRequest `json:"facets"` | |||
Explain bool `json:"explain"` | |||
Sort []json.RawMessage `json:"sort"` | |||
Q json.RawMessage `json:"query"` | |||
Size *int `json:"size"` | |||
From int `json:"from"` | |||
Highlight *HighlightRequest `json:"highlight"` | |||
Fields []string `json:"fields"` | |||
Facets FacetsRequest `json:"facets"` | |||
Explain bool `json:"explain"` | |||
Sort []json.RawMessage `json:"sort"` | |||
IncludeLocations bool `json:"includeLocations"` | |||
} | |||
err := json.Unmarshal(input, &temp) | |||
@@ -300,6 +333,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||
r.Highlight = temp.Highlight | |||
r.Fields = temp.Fields | |||
r.Facets = temp.Facets | |||
r.IncludeLocations = temp.IncludeLocations | |||
r.Query, err = query.ParseQuery(temp.Q) | |||
if err != nil { | |||
return err |
@@ -34,11 +34,20 @@ func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap { | |||
return rv | |||
} | |||
func (c *collectStoreHeap) Add(doc *search.DocumentMatch) { | |||
func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch, | |||
size int) *search.DocumentMatch { | |||
c.add(doc) | |||
if c.Len() > size { | |||
return c.removeLast() | |||
} | |||
return nil | |||
} | |||
func (c *collectStoreHeap) add(doc *search.DocumentMatch) { | |||
heap.Push(c, doc) | |||
} | |||
func (c *collectStoreHeap) RemoveLast() *search.DocumentMatch { | |||
func (c *collectStoreHeap) removeLast() *search.DocumentMatch { | |||
return heap.Pop(c).(*search.DocumentMatch) | |||
} | |||
@@ -49,17 +58,12 @@ func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.Documen | |||
return make(search.DocumentMatchCollection, 0), nil | |||
} | |||
rv := make(search.DocumentMatchCollection, size) | |||
for count > 0 { | |||
count-- | |||
if count >= skip { | |||
size-- | |||
doc := heap.Pop(c).(*search.DocumentMatch) | |||
rv[size] = doc | |||
err := fixup(doc) | |||
if err != nil { | |||
return nil, err | |||
} | |||
for i := size - 1; i >= 0; i-- { | |||
doc := heap.Pop(c).(*search.DocumentMatch) | |||
rv[i] = doc | |||
err := fixup(doc) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
return rv, nil |
@@ -34,7 +34,16 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList { | |||
return rv | |||
} | |||
func (c *collectStoreList) Add(doc *search.DocumentMatch) { | |||
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, | |||
size int) *search.DocumentMatch { | |||
c.add(doc) | |||
if c.len() > size { | |||
return c.removeLast() | |||
} | |||
return nil | |||
} | |||
func (c *collectStoreList) add(doc *search.DocumentMatch) { | |||
for e := c.results.Front(); e != nil; e = e.Next() { | |||
curr := e.Value.(*search.DocumentMatch) | |||
if c.compare(doc, curr) >= 0 { | |||
@@ -46,7 +55,7 @@ func (c *collectStoreList) Add(doc *search.DocumentMatch) { | |||
c.results.PushBack(doc) | |||
} | |||
func (c *collectStoreList) RemoveLast() *search.DocumentMatch { | |||
func (c *collectStoreList) removeLast() *search.DocumentMatch { | |||
return c.results.Remove(c.results.Front()).(*search.DocumentMatch) | |||
} | |||
@@ -73,6 +82,6 @@ func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.Documen | |||
return search.DocumentMatchCollection{}, nil | |||
} | |||
func (c *collectStoreList) Len() int { | |||
func (c *collectStoreList) len() int { | |||
return c.results.Len() | |||
} |
@@ -29,7 +29,16 @@ func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice { | |||
return rv | |||
} | |||
func (c *collectStoreSlice) Add(doc *search.DocumentMatch) { | |||
func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch, | |||
size int) *search.DocumentMatch { | |||
c.add(doc) | |||
if c.len() > size { | |||
return c.removeLast() | |||
} | |||
return nil | |||
} | |||
func (c *collectStoreSlice) add(doc *search.DocumentMatch) { | |||
// find where to insert, starting at end (lowest) | |||
i := len(c.slice) | |||
for ; i > 0; i-- { | |||
@@ -44,7 +53,7 @@ func (c *collectStoreSlice) Add(doc *search.DocumentMatch) { | |||
c.slice[i] = doc | |||
} | |||
func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch { | |||
func (c *collectStoreSlice) removeLast() *search.DocumentMatch { | |||
var rv *search.DocumentMatch | |||
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1] | |||
return rv | |||
@@ -63,6 +72,6 @@ func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.Docume | |||
return search.DocumentMatchCollection{}, nil | |||
} | |||
func (c *collectStoreSlice) Len() int { | |||
func (c *collectStoreSlice) len() int { | |||
return len(c.slice) | |||
} |
@@ -22,6 +22,15 @@ import ( | |||
"golang.org/x/net/context" | |||
) | |||
type collectorStore interface { | |||
// Add the document, and if the new store size exceeds the provided size | |||
// the last element is removed and returned. If the size has not been | |||
// exceeded, nil is returned. | |||
AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch | |||
Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) | |||
} | |||
// PreAllocSizeSkipCap will cap preallocation to this amount when | |||
// size+skip exceeds this value | |||
var PreAllocSizeSkipCap = 1000 | |||
@@ -41,7 +50,7 @@ type TopNCollector struct { | |||
results search.DocumentMatchCollection | |||
facetsBuilder *search.FacetsBuilder | |||
store *collectStoreSlice | |||
store collectorStore | |||
needDocIds bool | |||
neededFields []string | |||
@@ -68,9 +77,15 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector | |||
backingSize = PreAllocSizeSkipCap + 1 | |||
} | |||
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int { | |||
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) | |||
}) | |||
if size+skip > 10 { | |||
hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int { | |||
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) | |||
}) | |||
} else { | |||
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int { | |||
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) | |||
}) | |||
} | |||
// these lookups traverse an interface, so do once up-front | |||
if sort.RequiresDocID() { | |||
@@ -114,12 +129,6 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, | |||
default: | |||
} | |||
} | |||
if hc.facetsBuilder != nil { | |||
err = hc.facetsBuilder.Update(next) | |||
if err != nil { | |||
break | |||
} | |||
} | |||
err = hc.collectSingle(searchContext, reader, next) | |||
if err != nil { | |||
@@ -144,6 +153,16 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, | |||
var sortByScoreOpt = []string{"_score"} | |||
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error { | |||
var err error | |||
// visit field terms for features that require it (sort, facets) | |||
if len(hc.neededFields) > 0 { | |||
err = hc.visitFieldTerms(reader, d) | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
// increment total hits | |||
hc.total++ | |||
d.HitNumber = hc.total | |||
@@ -153,7 +172,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I | |||
hc.maxScore = d.Score | |||
} | |||
var err error | |||
// see if we need to load ID (at this early stage, for example to sort on it) | |||
if hc.needDocIds { | |||
d.ID, err = reader.ExternalID(d.IndexInternalID) | |||
@@ -162,22 +180,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I | |||
} | |||
} | |||
// see if we need to load the stored fields | |||
if len(hc.neededFields) > 0 { | |||
// find out which fields haven't been loaded yet | |||
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields) | |||
// look them up | |||
fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad) | |||
if err != nil { | |||
return err | |||
} | |||
// cache these as well | |||
if d.CachedFieldTerms == nil { | |||
d.CachedFieldTerms = make(map[string][]string) | |||
} | |||
d.CachedFieldTerms.Merge(fieldTerms) | |||
} | |||
// compute this hits sort value | |||
if len(hc.sort) == 1 && hc.cachedScoring[0] { | |||
d.Sort = sortByScoreOpt | |||
@@ -197,9 +199,8 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I | |||
} | |||
} | |||
hc.store.Add(d) | |||
if hc.store.Len() > hc.size+hc.skip { | |||
removed := hc.store.RemoveLast() | |||
removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip) | |||
if removed != nil { | |||
if hc.lowestMatchOutsideResults == nil { | |||
hc.lowestMatchOutsideResults = removed | |||
} else { | |||
@@ -215,9 +216,31 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I | |||
return nil | |||
} | |||
// visitFieldTerms is responsible for visiting the field terms of the | |||
// search hit, and passing visited terms to the sort and facet builder | |||
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error { | |||
if hc.facetsBuilder != nil { | |||
hc.facetsBuilder.StartDoc() | |||
} | |||
err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) { | |||
if hc.facetsBuilder != nil { | |||
hc.facetsBuilder.UpdateVisitor(field, term) | |||
} | |||
hc.sort.UpdateVisitor(field, term) | |||
}) | |||
if hc.facetsBuilder != nil { | |||
hc.facetsBuilder.EndDoc() | |||
} | |||
return err | |||
} | |||
// SetFacetsBuilder registers a facet builder for this collector | |||
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) { | |||
hc.facetsBuilder = facetsBuilder | |||
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...) | |||
} | |||
// finalizeResults starts with the heap containing the final top size+skip |
@@ -18,7 +18,6 @@ import ( | |||
"sort" | |||
"time" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/numeric" | |||
"github.com/blevesearch/bleve/search" | |||
) | |||
@@ -35,6 +34,7 @@ type DateTimeFacetBuilder struct { | |||
total int | |||
missing int | |||
ranges map[string]*dateTimeRange | |||
sawValue bool | |||
} | |||
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder { | |||
@@ -58,36 +58,35 @@ func (fb *DateTimeFacetBuilder) Field() string { | |||
return fb.field | |||
} | |||
func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) { | |||
terms, ok := ft[fb.field] | |||
if ok { | |||
for _, term := range terms { | |||
// only consider the values which are shifted 0 | |||
prefixCoded := numeric.PrefixCoded(term) | |||
shift, err := prefixCoded.Shift() | |||
if err == nil && shift == 0 { | |||
i64, err := prefixCoded.Int64() | |||
if err == nil { | |||
t := time.Unix(0, i64) | |||
// look at each of the ranges for a match | |||
for rangeName, r := range fb.ranges { | |||
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) { | |||
existingCount, existed := fb.termsCount[rangeName] | |||
if existed { | |||
fb.termsCount[rangeName] = existingCount + 1 | |||
} else { | |||
fb.termsCount[rangeName] = 1 | |||
} | |||
fb.total++ | |||
} | |||
func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) { | |||
if field == fb.field { | |||
fb.sawValue = true | |||
// only consider the values which are shifted 0 | |||
prefixCoded := numeric.PrefixCoded(term) | |||
shift, err := prefixCoded.Shift() | |||
if err == nil && shift == 0 { | |||
i64, err := prefixCoded.Int64() | |||
if err == nil { | |||
t := time.Unix(0, i64) | |||
// look at each of the ranges for a match | |||
for rangeName, r := range fb.ranges { | |||
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) { | |||
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1 | |||
fb.total++ | |||
} | |||
} | |||
} | |||
} | |||
} else { | |||
} | |||
} | |||
func (fb *DateTimeFacetBuilder) StartDoc() { | |||
fb.sawValue = false | |||
} | |||
func (fb *DateTimeFacetBuilder) EndDoc() { | |||
if !fb.sawValue { | |||
fb.missing++ | |||
} | |||
} |
@@ -17,7 +17,6 @@ package facet | |||
import ( | |||
"sort" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/numeric" | |||
"github.com/blevesearch/bleve/search" | |||
) | |||
@@ -34,6 +33,7 @@ type NumericFacetBuilder struct { | |||
total int | |||
missing int | |||
ranges map[string]*numericRange | |||
sawValue bool | |||
} | |||
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder { | |||
@@ -57,36 +57,35 @@ func (fb *NumericFacetBuilder) Field() string { | |||
return fb.field | |||
} | |||
func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) { | |||
terms, ok := ft[fb.field] | |||
if ok { | |||
for _, term := range terms { | |||
// only consider the values which are shifted 0 | |||
prefixCoded := numeric.PrefixCoded(term) | |||
shift, err := prefixCoded.Shift() | |||
if err == nil && shift == 0 { | |||
i64, err := prefixCoded.Int64() | |||
if err == nil { | |||
f64 := numeric.Int64ToFloat64(i64) | |||
// look at each of the ranges for a match | |||
for rangeName, r := range fb.ranges { | |||
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) { | |||
existingCount, existed := fb.termsCount[rangeName] | |||
if existed { | |||
fb.termsCount[rangeName] = existingCount + 1 | |||
} else { | |||
fb.termsCount[rangeName] = 1 | |||
} | |||
fb.total++ | |||
} | |||
func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) { | |||
if field == fb.field { | |||
fb.sawValue = true | |||
// only consider the values which are shifted 0 | |||
prefixCoded := numeric.PrefixCoded(term) | |||
shift, err := prefixCoded.Shift() | |||
if err == nil && shift == 0 { | |||
i64, err := prefixCoded.Int64() | |||
if err == nil { | |||
f64 := numeric.Int64ToFloat64(i64) | |||
// look at each of the ranges for a match | |||
for rangeName, r := range fb.ranges { | |||
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) { | |||
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1 | |||
fb.total++ | |||
} | |||
} | |||
} | |||
} | |||
} else { | |||
} | |||
} | |||
func (fb *NumericFacetBuilder) StartDoc() { | |||
fb.sawValue = false | |||
} | |||
func (fb *NumericFacetBuilder) EndDoc() { | |||
if !fb.sawValue { | |||
fb.missing++ | |||
} | |||
} |
@@ -17,7 +17,6 @@ package facet | |||
import ( | |||
"sort" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/search" | |||
) | |||
@@ -27,6 +26,7 @@ type TermsFacetBuilder struct { | |||
termsCount map[string]int | |||
total int | |||
missing int | |||
sawValue bool | |||
} | |||
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder { | |||
@@ -41,19 +41,20 @@ func (fb *TermsFacetBuilder) Field() string { | |||
return fb.field | |||
} | |||
func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) { | |||
terms, ok := ft[fb.field] | |||
if ok { | |||
for _, term := range terms { | |||
existingCount, existed := fb.termsCount[term] | |||
if existed { | |||
fb.termsCount[term] = existingCount + 1 | |||
} else { | |||
fb.termsCount[term] = 1 | |||
} | |||
fb.total++ | |||
} | |||
} else { | |||
func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) { | |||
if field == fb.field { | |||
fb.sawValue = true | |||
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1 | |||
fb.total++ | |||
} | |||
} | |||
func (fb *TermsFacetBuilder) StartDoc() { | |||
fb.sawValue = false | |||
} | |||
func (fb *TermsFacetBuilder) EndDoc() { | |||
if !fb.sawValue { | |||
fb.missing++ | |||
} | |||
} |
@@ -21,7 +21,10 @@ import ( | |||
) | |||
type FacetBuilder interface { | |||
Update(index.FieldTerms) | |||
StartDoc() | |||
UpdateVisitor(field string, term []byte) | |||
EndDoc() | |||
Result() *FacetResult | |||
Field() string | |||
} | |||
@@ -41,33 +44,29 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder { | |||
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { | |||
fb.facets[name] = facetBuilder | |||
fb.fields = append(fb.fields, facetBuilder.Field()) | |||
} | |||
func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error { | |||
if fb.fields == nil { | |||
for _, facetBuilder := range fb.facets { | |||
fb.fields = append(fb.fields, facetBuilder.Field()) | |||
} | |||
func (fb *FacetsBuilder) RequiredFields() []string { | |||
return fb.fields | |||
} | |||
func (fb *FacetsBuilder) StartDoc() { | |||
for _, facetBuilder := range fb.facets { | |||
facetBuilder.StartDoc() | |||
} | |||
} | |||
if len(fb.fields) > 0 { | |||
// find out which fields haven't been loaded yet | |||
fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fb.fields) | |||
// look them up | |||
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad) | |||
if err != nil { | |||
return err | |||
} | |||
// cache these as well | |||
if docMatch.CachedFieldTerms == nil { | |||
docMatch.CachedFieldTerms = make(map[string][]string) | |||
} | |||
docMatch.CachedFieldTerms.Merge(fieldTerms) | |||
func (fb *FacetsBuilder) EndDoc() { | |||
for _, facetBuilder := range fb.facets { | |||
facetBuilder.EndDoc() | |||
} | |||
} | |||
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) { | |||
for _, facetBuilder := range fb.facets { | |||
facetBuilder.Update(docMatch.CachedFieldTerms) | |||
facetBuilder.UpdateVisitor(field, term) | |||
} | |||
return nil | |||
} | |||
type TermFacet struct { |
@@ -44,7 +44,7 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h | |||
continue | |||
} | |||
// make sure the array positions match | |||
if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) { | |||
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) { | |||
continue | |||
} | |||
if termLocation.Start < curr { |
@@ -37,7 +37,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) { | |||
OUTER: | |||
for _, locations := range s.tlm { | |||
for _, location := range locations { | |||
if highlight.SameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { | |||
if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { | |||
score += 1.0 | |||
// once we find a term in the fragment | |||
// don't care about additional matches |
@@ -87,7 +87,7 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume | |||
if ok { | |||
termLocationsSameArrayPosition := make(highlight.TermLocations, 0) | |||
for _, otl := range orderedTermLocations { | |||
if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) { | |||
if otl.ArrayPositions.Equals(f.ArrayPositions()) { | |||
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl) | |||
} | |||
} |
@@ -23,7 +23,7 @@ import ( | |||
type TermLocation struct { | |||
Term string | |||
ArrayPositions []float64 | |||
ArrayPositions search.ArrayPositions | |||
Pos int | |||
Start int | |||
End int | |||
@@ -103,15 +103,3 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations { | |||
sort.Sort(rv) | |||
return rv | |||
} | |||
func SameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool { | |||
if len(fieldArrayPositions) != len(termLocationArrayPositions) { | |||
return false | |||
} | |||
for i := 0; i < len(fieldArrayPositions); i++ { | |||
if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) { | |||
return false | |||
} | |||
} | |||
return true | |||
} |
@@ -37,13 +37,17 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch { | |||
// pre-allocated to accommodate the requested number of DocumentMatch | |||
// instances | |||
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool { | |||
avail := make(DocumentMatchCollection, 0, size) | |||
avail := make(DocumentMatchCollection, size) | |||
// pre-allocate the expected number of instances | |||
startBlock := make([]DocumentMatch, size) | |||
startSorts := make([]string, size*sortsize) | |||
// make these initial instances available | |||
for i := range startBlock { | |||
startBlock[i].Sort = make([]string, 0, sortsize) | |||
avail = append(avail, &startBlock[i]) | |||
i, j := 0, 0 | |||
for i < size { | |||
avail[i] = &startBlock[i] | |||
avail[i].Sort = startSorts[j:j] | |||
i += 1 | |||
j += sortsize | |||
} | |||
return &DocumentMatchPool{ | |||
avail: avail, |
@@ -22,7 +22,7 @@ import ( | |||
) | |||
type BoolFieldQuery struct { | |||
Bool bool `json:"bool"` | |||
Bool bool `json:"bool"` | |||
FieldVal string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
} | |||
@@ -39,20 +39,19 @@ func (q *BoolFieldQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *BoolFieldQuery) Boost() float64{ | |||
return q.BoostVal.Value() | |||
func (q *BoolFieldQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *BoolFieldQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *BoolFieldQuery) Field() string{ | |||
func (q *BoolFieldQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
@@ -61,5 +60,5 @@ func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, e | |||
if q.Bool { | |||
term = "T" | |||
} | |||
return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), explain) | |||
return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), options) | |||
} |
@@ -25,10 +25,11 @@ import ( | |||
) | |||
type BooleanQuery struct { | |||
Must Query `json:"must,omitempty"` | |||
Should Query `json:"should,omitempty"` | |||
MustNot Query `json:"must_not,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
Must Query `json:"must,omitempty"` | |||
Should Query `json:"should,omitempty"` | |||
MustNot Query `json:"must_not,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
queryStringMode bool | |||
} | |||
// NewBooleanQuery creates a compound Query composed | |||
@@ -55,6 +56,15 @@ func NewBooleanQuery(must []Query, should []Query, mustNot []Query) *BooleanQuer | |||
return &rv | |||
} | |||
func NewBooleanQueryForQueryString(must []Query, should []Query, mustNot []Query) *BooleanQuery { | |||
rv := NewBooleanQuery(nil, nil, nil) | |||
rv.queryStringMode = true | |||
rv.AddMust(must...) | |||
rv.AddShould(should...) | |||
rv.AddMustNot(mustNot...) | |||
return rv | |||
} | |||
// SetMinShould requires that at least minShould of the | |||
// should Queries must be satisfied. | |||
func (q *BooleanQuery) SetMinShould(minShould float64) { | |||
@@ -63,7 +73,9 @@ func (q *BooleanQuery) SetMinShould(minShould float64) { | |||
func (q *BooleanQuery) AddMust(m ...Query) { | |||
if q.Must == nil { | |||
q.Must = NewConjunctionQuery([]Query{}) | |||
tmp := NewConjunctionQuery([]Query{}) | |||
tmp.queryStringMode = q.queryStringMode | |||
q.Must = tmp | |||
} | |||
for _, mq := range m { | |||
q.Must.(*ConjunctionQuery).AddQuery(mq) | |||
@@ -72,7 +84,9 @@ func (q *BooleanQuery) AddMust(m ...Query) { | |||
func (q *BooleanQuery) AddShould(m ...Query) { | |||
if q.Should == nil { | |||
q.Should = NewDisjunctionQuery([]Query{}) | |||
tmp := NewDisjunctionQuery([]Query{}) | |||
tmp.queryStringMode = q.queryStringMode | |||
q.Should = tmp | |||
} | |||
for _, mq := range m { | |||
q.Should.(*DisjunctionQuery).AddQuery(mq) | |||
@@ -81,7 +95,9 @@ func (q *BooleanQuery) AddShould(m ...Query) { | |||
func (q *BooleanQuery) AddMustNot(m ...Query) { | |||
if q.MustNot == nil { | |||
q.MustNot = NewDisjunctionQuery([]Query{}) | |||
tmp := NewDisjunctionQuery([]Query{}) | |||
tmp.queryStringMode = q.queryStringMode | |||
q.MustNot = tmp | |||
} | |||
for _, mq := range m { | |||
q.MustNot.(*DisjunctionQuery).AddQuery(mq) | |||
@@ -93,44 +109,67 @@ func (q *BooleanQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *BooleanQuery) Boost() float64{ | |||
func (q *BooleanQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
var err error | |||
var mustNotSearcher search.Searcher | |||
if q.MustNot != nil { | |||
mustNotSearcher, err = q.MustNot.Searcher(i, m, explain) | |||
mustNotSearcher, err = q.MustNot.Searcher(i, m, options) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if q.Must == nil && q.Should == nil { | |||
q.Must = NewMatchAllQuery() | |||
// if must not is MatchNone, reset it to nil | |||
if _, ok := mustNotSearcher.(*searcher.MatchNoneSearcher); ok { | |||
mustNotSearcher = nil | |||
} | |||
} | |||
var mustSearcher search.Searcher | |||
if q.Must != nil { | |||
mustSearcher, err = q.Must.Searcher(i, m, explain) | |||
mustSearcher, err = q.Must.Searcher(i, m, options) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// if must searcher is MatchNone, reset it to nil | |||
if _, ok := mustSearcher.(*searcher.MatchNoneSearcher); ok { | |||
mustSearcher = nil | |||
} | |||
} | |||
var shouldSearcher search.Searcher | |||
if q.Should != nil { | |||
shouldSearcher, err = q.Should.Searcher(i, m, explain) | |||
shouldSearcher, err = q.Should.Searcher(i, m, options) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// if should searcher is MatchNone, reset it to nil | |||
if _, ok := shouldSearcher.(*searcher.MatchNoneSearcher); ok { | |||
shouldSearcher = nil | |||
} | |||
} | |||
// if all 3 are nil, return MatchNone | |||
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil { | |||
return searcher.NewMatchNoneSearcher(i) | |||
} | |||
// if only mustNotSearcher, start with MatchAll | |||
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil { | |||
mustSearcher, err = searcher.NewMatchAllSearcher(i, 1.0, options) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
// optimization, if only should searcher, just return it instead | |||
if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil { | |||
return shouldSearcher, nil | |||
} | |||
return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, explain) | |||
return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, options) | |||
} | |||
func (q *BooleanQuery) Validate() error { |
@@ -24,8 +24,9 @@ import ( | |||
) | |||
type ConjunctionQuery struct { | |||
Conjuncts []Query `json:"conjuncts"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
Conjuncts []Query `json:"conjuncts"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
queryStringMode bool | |||
} | |||
// NewConjunctionQuery creates a new compound Query. | |||
@@ -41,7 +42,7 @@ func (q *ConjunctionQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *ConjunctionQuery) Boost() float64{ | |||
func (q *ConjunctionQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -51,11 +52,10 @@ func (q *ConjunctionQuery) AddQuery(aq ...Query) { | |||
} | |||
} | |||
func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
ss := make([]search.Searcher, len(q.Conjuncts)) | |||
for in, conjunct := range q.Conjuncts { | |||
var err error | |||
ss[in], err = conjunct.Searcher(i, m, explain) | |||
func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
ss := make([]search.Searcher, 0, len(q.Conjuncts)) | |||
for _, conjunct := range q.Conjuncts { | |||
sr, err := conjunct.Searcher(i, m, options) | |||
if err != nil { | |||
for _, searcher := range ss { | |||
if searcher != nil { | |||
@@ -64,8 +64,16 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | |||
} | |||
return nil, err | |||
} | |||
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode { | |||
// in query string mode, skip match none | |||
continue | |||
} | |||
ss = append(ss, sr) | |||
} | |||
if len(ss) < 1 { | |||
return searcher.NewMatchNoneSearcher(i) | |||
} | |||
return searcher.NewConjunctionSearcher(i, ss, explain) | |||
return searcher.NewConjunctionSearcher(i, ss, options) | |||
} | |||
func (q *ConjunctionQuery) Validate() error { |
@@ -113,20 +113,19 @@ func (q *DateRangeQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *DateRangeQuery) Boost() float64{ | |||
func (q *DateRangeQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *DateRangeQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *DateRangeQuery) Field() string{ | |||
func (q *DateRangeQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
min, max, err := q.parseEndpoints() | |||
if err != nil { | |||
return nil, err | |||
@@ -137,7 +136,7 @@ func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, e | |||
field = m.DefaultSearchField() | |||
} | |||
return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), explain) | |||
return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options) | |||
} | |||
func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { |
@@ -25,9 +25,10 @@ import ( | |||
) | |||
type DisjunctionQuery struct { | |||
Disjuncts []Query `json:"disjuncts"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
Min float64 `json:"min"` | |||
Disjuncts []Query `json:"disjuncts"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
Min float64 `json:"min"` | |||
queryStringMode bool | |||
} | |||
// NewDisjunctionQuery creates a new compound Query. | |||
@@ -43,11 +44,10 @@ func (q *DisjunctionQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *DisjunctionQuery) Boost() float64{ | |||
func (q *DisjunctionQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *DisjunctionQuery) AddQuery(aq ...Query) { | |||
for _, aaq := range aq { | |||
q.Disjuncts = append(q.Disjuncts, aaq) | |||
@@ -58,11 +58,10 @@ func (q *DisjunctionQuery) SetMin(m float64) { | |||
q.Min = m | |||
} | |||
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
ss := make([]search.Searcher, len(q.Disjuncts)) | |||
for in, disjunct := range q.Disjuncts { | |||
var err error | |||
ss[in], err = disjunct.Searcher(i, m, explain) | |||
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
ss := make([]search.Searcher, 0, len(q.Disjuncts)) | |||
for _, disjunct := range q.Disjuncts { | |||
sr, err := disjunct.Searcher(i, m, options) | |||
if err != nil { | |||
for _, searcher := range ss { | |||
if searcher != nil { | |||
@@ -71,8 +70,16 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | |||
} | |||
return nil, err | |||
} | |||
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode { | |||
// in query string mode, skip match none | |||
continue | |||
} | |||
ss = append(ss, sr) | |||
} | |||
if len(ss) < 1 { | |||
return searcher.NewMatchNoneSearcher(i) | |||
} | |||
return searcher.NewDisjunctionSearcher(i, ss, q.Min, explain) | |||
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) | |||
} | |||
func (q *DisjunctionQuery) Validate() error { |
@@ -40,10 +40,10 @@ func (q *DocIDQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *DocIDQuery) Boost() float64{ | |||
func (q *DocIDQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), explain) | |||
func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), options) | |||
} |
@@ -48,7 +48,7 @@ func (q *FuzzyQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *FuzzyQuery) Boost() float64{ | |||
func (q *FuzzyQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -56,7 +56,7 @@ func (q *FuzzyQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *FuzzyQuery) Field() string{ | |||
func (q *FuzzyQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
@@ -68,10 +68,10 @@ func (q *FuzzyQuery) SetPrefix(p int) { | |||
q.Prefix = p | |||
} | |||
func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
} | |||
return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), explain) | |||
return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), options) | |||
} |
@@ -0,0 +1,113 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package query | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"github.com/blevesearch/bleve/geo" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/mapping" | |||
"github.com/blevesearch/bleve/search" | |||
"github.com/blevesearch/bleve/search/searcher" | |||
) | |||
type GeoBoundingBoxQuery struct { | |||
TopLeft []float64 `json:"top_left,omitempty"` | |||
BottomRight []float64 `json:"bottom_right,omitempty"` | |||
FieldVal string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
} | |||
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *GeoBoundingBoxQuery { | |||
return &GeoBoundingBoxQuery{ | |||
TopLeft: []float64{topLeftLon, topLeftLat}, | |||
BottomRight: []float64{bottomRightLon, bottomRightLat}, | |||
} | |||
} | |||
func (q *GeoBoundingBoxQuery) SetBoost(b float64) { | |||
boost := Boost(b) | |||
q.BoostVal = &boost | |||
} | |||
func (q *GeoBoundingBoxQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *GeoBoundingBoxQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *GeoBoundingBoxQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
} | |||
if q.BottomRight[0] < q.TopLeft[0] { | |||
// cross date line, rewrite as two parts | |||
leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, -180, q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true) | |||
if err != nil { | |||
return nil, err | |||
} | |||
rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], 180, q.TopLeft[1], field, q.BoostVal.Value(), options, true) | |||
if err != nil { | |||
_ = leftSearcher.Close() | |||
return nil, err | |||
} | |||
return searcher.NewDisjunctionSearcher(i, []search.Searcher{leftSearcher, rightSearcher}, 0, options) | |||
} | |||
return searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true) | |||
} | |||
func (q *GeoBoundingBoxQuery) Validate() error { | |||
return nil | |||
} | |||
func (q *GeoBoundingBoxQuery) UnmarshalJSON(data []byte) error { | |||
tmp := struct { | |||
TopLeft interface{} `json:"top_left,omitempty"` | |||
BottomRight interface{} `json:"bottom_right,omitempty"` | |||
FieldVal string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
}{} | |||
err := json.Unmarshal(data, &tmp) | |||
if err != nil { | |||
return err | |||
} | |||
// now use our generic point parsing code from the geo package | |||
lon, lat, found := geo.ExtractGeoPoint(tmp.TopLeft) | |||
if !found { | |||
return fmt.Errorf("geo location top_left not in a valid format") | |||
} | |||
q.TopLeft = []float64{lon, lat} | |||
lon, lat, found = geo.ExtractGeoPoint(tmp.BottomRight) | |||
if !found { | |||
return fmt.Errorf("geo location bottom_right not in a valid format") | |||
} | |||
q.BottomRight = []float64{lon, lat} | |||
q.FieldVal = tmp.FieldVal | |||
q.BoostVal = tmp.BoostVal | |||
return nil | |||
} |
@@ -0,0 +1,100 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package query | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"github.com/blevesearch/bleve/geo" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/mapping" | |||
"github.com/blevesearch/bleve/search" | |||
"github.com/blevesearch/bleve/search/searcher" | |||
) | |||
type GeoDistanceQuery struct { | |||
Location []float64 `json:"location,omitempty"` | |||
Distance string `json:"distance,omitempty"` | |||
FieldVal string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
} | |||
func NewGeoDistanceQuery(lon, lat float64, distance string) *GeoDistanceQuery { | |||
return &GeoDistanceQuery{ | |||
Location: []float64{lon, lat}, | |||
Distance: distance, | |||
} | |||
} | |||
func (q *GeoDistanceQuery) SetBoost(b float64) { | |||
boost := Boost(b) | |||
q.BoostVal = &boost | |||
} | |||
func (q *GeoDistanceQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *GeoDistanceQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *GeoDistanceQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | |||
options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
} | |||
dist, err := geo.ParseDistance(q.Distance) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return searcher.NewGeoPointDistanceSearcher(i, q.Location[0], q.Location[1], | |||
dist, field, q.BoostVal.Value(), options) | |||
} | |||
func (q *GeoDistanceQuery) Validate() error { | |||
return nil | |||
} | |||
func (q *GeoDistanceQuery) UnmarshalJSON(data []byte) error { | |||
tmp := struct { | |||
Location interface{} `json:"location,omitempty"` | |||
Distance string `json:"distance,omitempty"` | |||
FieldVal string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
}{} | |||
err := json.Unmarshal(data, &tmp) | |||
if err != nil { | |||
return err | |||
} | |||
// now use our generic point parsing code from the geo package | |||
lon, lat, found := geo.ExtractGeoPoint(tmp.Location) | |||
if !found { | |||
return fmt.Errorf("geo location not in a valid format") | |||
} | |||
q.Location = []float64{lon, lat} | |||
q.Distance = tmp.Distance | |||
q.FieldVal = tmp.FieldVal | |||
q.BoostVal = tmp.BoostVal | |||
return nil | |||
} |
@@ -90,7 +90,7 @@ func (q *MatchQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *MatchQuery) Boost() float64{ | |||
func (q *MatchQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -98,7 +98,7 @@ func (q *MatchQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *MatchQuery) Field() string{ | |||
func (q *MatchQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
@@ -114,7 +114,7 @@ func (q *MatchQuery) SetOperator(operator MatchQueryOperator) { | |||
q.Operator = operator | |||
} | |||
func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
@@ -160,17 +160,17 @@ func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, expla | |||
shouldQuery := NewDisjunctionQuery(tqs) | |||
shouldQuery.SetMin(1) | |||
shouldQuery.SetBoost(q.BoostVal.Value()) | |||
return shouldQuery.Searcher(i, m, explain) | |||
return shouldQuery.Searcher(i, m, options) | |||
case MatchQueryOperatorAnd: | |||
mustQuery := NewConjunctionQuery(tqs) | |||
mustQuery.SetBoost(q.BoostVal.Value()) | |||
return mustQuery.Searcher(i, m, explain) | |||
return mustQuery.Searcher(i, m, options) | |||
default: | |||
return nil, fmt.Errorf("unhandled operator %d", q.Operator) | |||
} | |||
} | |||
noneQuery := NewMatchNoneQuery() | |||
return noneQuery.Searcher(i, m, explain) | |||
return noneQuery.Searcher(i, m, options) | |||
} |
@@ -38,14 +38,12 @@ func (q *MatchAllQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *MatchAllQuery) Boost() float64{ | |||
func (q *MatchAllQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), explain) | |||
func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), options) | |||
} | |||
func (q *MatchAllQuery) MarshalJSON() ([]byte, error) { |
@@ -38,11 +38,11 @@ func (q *MatchNoneQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *MatchNoneQuery) Boost() float64{ | |||
func (q *MatchNoneQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
return searcher.NewMatchNoneSearcher(i) | |||
} | |||
@@ -49,7 +49,7 @@ func (q *MatchPhraseQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *MatchPhraseQuery) Boost() float64{ | |||
func (q *MatchPhraseQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -57,11 +57,11 @@ func (q *MatchPhraseQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *MatchPhraseQuery) Field() string{ | |||
func (q *MatchPhraseQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
@@ -81,15 +81,15 @@ func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | |||
tokens := analyzer.Analyze([]byte(q.MatchPhrase)) | |||
if len(tokens) > 0 { | |||
phrase := tokenStreamToPhrase(tokens) | |||
phraseQuery := NewPhraseQuery(phrase, field) | |||
phraseQuery := NewMultiPhraseQuery(phrase, field) | |||
phraseQuery.SetBoost(q.BoostVal.Value()) | |||
return phraseQuery.Searcher(i, m, explain) | |||
return phraseQuery.Searcher(i, m, options) | |||
} | |||
noneQuery := NewMatchNoneQuery() | |||
return noneQuery.Searcher(i, m, explain) | |||
return noneQuery.Searcher(i, m, options) | |||
} | |||
func tokenStreamToPhrase(tokens analysis.TokenStream) []string { | |||
func tokenStreamToPhrase(tokens analysis.TokenStream) [][]string { | |||
firstPosition := int(^uint(0) >> 1) | |||
lastPosition := 0 | |||
for _, token := range tokens { | |||
@@ -102,13 +102,10 @@ func tokenStreamToPhrase(tokens analysis.TokenStream) []string { | |||
} | |||
phraseLen := lastPosition - firstPosition + 1 | |||
if phraseLen > 0 { | |||
rv := make([]string, phraseLen) | |||
for i := 0; i < phraseLen; i++ { | |||
rv[i] = "" | |||
} | |||
rv := make([][]string, phraseLen) | |||
for _, token := range tokens { | |||
pos := token.Position - firstPosition | |||
rv[pos] = string(token.Term) | |||
rv[pos] = append(rv[pos], string(token.Term)) | |||
} | |||
return rv | |||
} |
@@ -0,0 +1,80 @@ | |||
// Copyright (c) 2014 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package query | |||
import ( | |||
"encoding/json" | |||
"fmt" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/mapping" | |||
"github.com/blevesearch/bleve/search" | |||
"github.com/blevesearch/bleve/search/searcher" | |||
) | |||
type MultiPhraseQuery struct { | |||
Terms [][]string `json:"terms"` | |||
Field string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
} | |||
// NewMultiPhraseQuery creates a new Query for finding | |||
// term phrases in the index. | |||
// It is like PhraseQuery, but each position in the | |||
// phrase may be satisfied by a list of terms | |||
// as opposed to just one. | |||
// At least one of the terms must exist in the correct | |||
// order, at the correct index offsets, in the | |||
// specified field. Queried field must have been indexed with | |||
// IncludeTermVectors set to true. | |||
func NewMultiPhraseQuery(terms [][]string, field string) *MultiPhraseQuery { | |||
return &MultiPhraseQuery{ | |||
Terms: terms, | |||
Field: field, | |||
} | |||
} | |||
func (q *MultiPhraseQuery) SetBoost(b float64) { | |||
boost := Boost(b) | |||
q.BoostVal = &boost | |||
} | |||
func (q *MultiPhraseQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *MultiPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
return searcher.NewMultiPhraseSearcher(i, q.Terms, q.Field, options) | |||
} | |||
func (q *MultiPhraseQuery) Validate() error { | |||
if len(q.Terms) < 1 { | |||
return fmt.Errorf("phrase query must contain at least one term") | |||
} | |||
return nil | |||
} | |||
func (q *MultiPhraseQuery) UnmarshalJSON(data []byte) error { | |||
type _mphraseQuery MultiPhraseQuery | |||
tmp := _mphraseQuery{} | |||
err := json.Unmarshal(data, &tmp) | |||
if err != nil { | |||
return err | |||
} | |||
q.Terms = tmp.Terms | |||
q.Field = tmp.Field | |||
q.BoostVal = tmp.BoostVal | |||
return nil | |||
} |
@@ -59,7 +59,7 @@ func (q *NumericRangeQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *NumericRangeQuery) Boost() float64{ | |||
func (q *NumericRangeQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -67,16 +67,16 @@ func (q *NumericRangeQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *NumericRangeQuery) Field() string{ | |||
func (q *NumericRangeQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
} | |||
return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), explain) | |||
return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options) | |||
} | |||
func (q *NumericRangeQuery) Validate() error { |
@@ -25,10 +25,9 @@ import ( | |||
) | |||
type PhraseQuery struct { | |||
Terms []string `json:"terms"` | |||
Field string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
termQueries []Query | |||
Terms []string `json:"terms"` | |||
Field string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
} | |||
// NewPhraseQuery creates a new Query for finding | |||
@@ -38,18 +37,9 @@ type PhraseQuery struct { | |||
// specified field. Queried field must have been indexed with | |||
// IncludeTermVectors set to true. | |||
func NewPhraseQuery(terms []string, field string) *PhraseQuery { | |||
termQueries := make([]Query, 0) | |||
for _, term := range terms { | |||
if term != "" { | |||
tq := NewTermQuery(term) | |||
tq.SetField(field) | |||
termQueries = append(termQueries, tq) | |||
} | |||
} | |||
return &PhraseQuery{ | |||
Terms: terms, | |||
Field: field, | |||
termQueries: termQueries, | |||
Terms: terms, | |||
Field: field, | |||
} | |||
} | |||
@@ -58,22 +48,16 @@ func (q *PhraseQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *PhraseQuery) Boost() float64{ | |||
func (q *PhraseQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
conjunctionQuery := NewConjunctionQuery(q.termQueries) | |||
conjunctionSearcher, err := conjunctionQuery.Searcher(i, m, explain) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return searcher.NewPhraseSearcher(i, conjunctionSearcher.(*searcher.ConjunctionSearcher), q.Terms) | |||
func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
return searcher.NewPhraseSearcher(i, q.Terms, q.Field, options) | |||
} | |||
func (q *PhraseQuery) Validate() error { | |||
if len(q.termQueries) < 1 { | |||
if len(q.Terms) < 1 { | |||
return fmt.Errorf("phrase query must contain at least one term") | |||
} | |||
return nil | |||
@@ -89,9 +73,5 @@ func (q *PhraseQuery) UnmarshalJSON(data []byte) error { | |||
q.Terms = tmp.Terms | |||
q.Field = tmp.Field | |||
q.BoostVal = tmp.BoostVal | |||
q.termQueries = make([]Query, len(q.Terms)) | |||
for i, term := range q.Terms { | |||
q.termQueries[i] = &TermQuery{Term: term, FieldVal: q.Field, BoostVal: q.BoostVal} | |||
} | |||
return nil | |||
} |
@@ -41,7 +41,7 @@ func (q *PrefixQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *PrefixQuery) Boost() float64{ | |||
func (q *PrefixQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -49,14 +49,14 @@ func (q *PrefixQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *PrefixQuery) Field() string{ | |||
func (q *PrefixQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
} | |||
return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), explain) | |||
return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), options) | |||
} |
@@ -36,7 +36,8 @@ func SetLog(l *log.Logger) { | |||
// A Query represents a description of the type | |||
// and parameters for a query into the index. | |||
type Query interface { | |||
Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) | |||
Searcher(i index.IndexReader, m mapping.IndexMapping, | |||
options search.SearcherOptions) (search.Searcher, error) | |||
} | |||
// A BoostableQuery represents a Query which can be boosted | |||
@@ -122,7 +123,13 @@ func ParseQuery(input []byte) (Query, error) { | |||
var rv PhraseQuery | |||
err := json.Unmarshal(input, &rv) | |||
if err != nil { | |||
return nil, err | |||
// now try multi-phrase | |||
var rv2 MultiPhraseQuery | |||
err = json.Unmarshal(input, &rv2) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &rv2, nil | |||
} | |||
return &rv, nil | |||
} | |||
@@ -154,8 +161,8 @@ func ParseQuery(input []byte) (Query, error) { | |||
} | |||
return &rv, nil | |||
} | |||
_, hasMin := tmp["min"] | |||
_, hasMax := tmp["max"] | |||
_, hasMin := tmp["min"].(float64) | |||
_, hasMax := tmp["max"].(float64) | |||
if hasMin || hasMax { | |||
var rv NumericRangeQuery | |||
err := json.Unmarshal(input, &rv) | |||
@@ -164,6 +171,16 @@ func ParseQuery(input []byte) (Query, error) { | |||
} | |||
return &rv, nil | |||
} | |||
_, hasMinStr := tmp["min"].(string) | |||
_, hasMaxStr := tmp["max"].(string) | |||
if hasMinStr || hasMaxStr { | |||
var rv TermRangeQuery | |||
err := json.Unmarshal(input, &rv) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &rv, nil | |||
} | |||
_, hasStart := tmp["start"] | |||
_, hasEnd := tmp["end"] | |||
if hasStart || hasEnd { | |||
@@ -237,6 +254,25 @@ func ParseQuery(input []byte) (Query, error) { | |||
} | |||
return &rv, nil | |||
} | |||
_, hasTopLeft := tmp["top_left"] | |||
_, hasBottomRight := tmp["bottom_right"] | |||
if hasTopLeft && hasBottomRight { | |||
var rv GeoBoundingBoxQuery | |||
err := json.Unmarshal(input, &rv) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &rv, nil | |||
} | |||
_, hasDistance := tmp["distance"] | |||
if hasDistance { | |||
var rv GeoDistanceQuery | |||
err := json.Unmarshal(input, &rv) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &rv, nil | |||
} | |||
return nil, fmt.Errorf("unknown query type") | |||
} | |||
@@ -300,14 +336,6 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) { | |||
return nil, err | |||
} | |||
return &q, nil | |||
case *PhraseQuery: | |||
q := *query.(*PhraseQuery) | |||
children, err := expandSlice(q.termQueries) | |||
if err != nil { | |||
return nil, err | |||
} | |||
q.termQueries = children | |||
return &q, nil | |||
default: | |||
return query, nil | |||
} |
@@ -39,16 +39,20 @@ func (q *QueryStringQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *QueryStringQuery) Boost() float64{ | |||
func (q *QueryStringQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *QueryStringQuery) Parse() (Query, error) { | |||
return parseQuerySyntax(q.Query) | |||
} | |||
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
newQuery, err := parseQuerySyntax(q.Query) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return newQuery.Searcher(i, m, explain) | |||
return newQuery.Searcher(i, m, options) | |||
} | |||
func (q *QueryStringQuery) Validate() error { |
@@ -27,6 +27,7 @@ tEQUAL tTILDE | |||
%type <s> tSTRING | |||
%type <s> tPHRASE | |||
%type <s> tNUMBER | |||
%type <s> posOrNegNumber | |||
%type <s> tTILDE | |||
%type <s> tBOOST | |||
%type <q> searchBase | |||
@@ -127,7 +128,15 @@ tSTRING tCOLON tSTRING tTILDE { | |||
tNUMBER { | |||
str := $1 | |||
logDebugGrammar("STRING - %s", str) | |||
q := NewMatchQuery(str) | |||
q1 := NewMatchQuery(str) | |||
val, err := strconv.ParseFloat($1, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
inclusive := true | |||
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive) | |||
q := NewDisjunctionQuery([]Query{q1,q2}) | |||
q.queryStringMode = true | |||
$$ = q | |||
} | |||
| | |||
@@ -154,12 +163,21 @@ tSTRING tCOLON tSTRING { | |||
$$ = q | |||
} | |||
| | |||
tSTRING tCOLON tNUMBER { | |||
tSTRING tCOLON posOrNegNumber { | |||
field := $1 | |||
str := $3 | |||
logDebugGrammar("FIELD - %s STRING - %s", field, str) | |||
q := NewMatchQuery(str) | |||
q.SetField(field) | |||
q1 := NewMatchQuery(str) | |||
q1.SetField(field) | |||
val, err := strconv.ParseFloat($3, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
inclusive := true | |||
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive) | |||
q2.SetField(field) | |||
q := NewDisjunctionQuery([]Query{q1,q2}) | |||
q.queryStringMode = true | |||
$$ = q | |||
} | |||
| | |||
@@ -172,9 +190,12 @@ tSTRING tCOLON tPHRASE { | |||
$$ = q | |||
} | |||
| | |||
tSTRING tCOLON tGREATER tNUMBER { | |||
tSTRING tCOLON tGREATER posOrNegNumber { | |||
field := $1 | |||
min, _ := strconv.ParseFloat($4, 64) | |||
min, err := strconv.ParseFloat($4, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
minInclusive := false | |||
logDebugGrammar("FIELD - GREATER THAN %f", min) | |||
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil) | |||
@@ -182,9 +203,12 @@ tSTRING tCOLON tGREATER tNUMBER { | |||
$$ = q | |||
} | |||
| | |||
tSTRING tCOLON tGREATER tEQUAL tNUMBER { | |||
tSTRING tCOLON tGREATER tEQUAL posOrNegNumber { | |||
field := $1 | |||
min, _ := strconv.ParseFloat($5, 64) | |||
min, err := strconv.ParseFloat($5, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
minInclusive := true | |||
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min) | |||
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil) | |||
@@ -192,9 +216,12 @@ tSTRING tCOLON tGREATER tEQUAL tNUMBER { | |||
$$ = q | |||
} | |||
| | |||
tSTRING tCOLON tLESS tNUMBER { | |||
tSTRING tCOLON tLESS posOrNegNumber { | |||
field := $1 | |||
max, _ := strconv.ParseFloat($4, 64) | |||
max, err := strconv.ParseFloat($4, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
maxInclusive := false | |||
logDebugGrammar("FIELD - LESS THAN %f", max) | |||
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive) | |||
@@ -202,9 +229,12 @@ tSTRING tCOLON tLESS tNUMBER { | |||
$$ = q | |||
} | |||
| | |||
tSTRING tCOLON tLESS tEQUAL tNUMBER { | |||
tSTRING tCOLON tLESS tEQUAL posOrNegNumber { | |||
field := $1 | |||
max, _ := strconv.ParseFloat($5, 64) | |||
max, err := strconv.ParseFloat($5, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
maxInclusive := true | |||
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max) | |||
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive) | |||
@@ -287,3 +317,12 @@ tBOOST { | |||
} | |||
logDebugGrammar("BOOST %f", boost) | |||
}; | |||
posOrNegNumber: | |||
tNUMBER { | |||
$$ = $1 | |||
} | |||
| | |||
tMINUS tNUMBER { | |||
$$ = "-" + $2 | |||
}; |
@@ -70,57 +70,58 @@ var yyExca = [...]int{ | |||
-2, 5, | |||
} | |||
const yyNprod = 26 | |||
const yyNprod = 28 | |||
const yyPrivate = 57344 | |||
var yyTokenNames []string | |||
var yyStates []string | |||
const yyLast = 31 | |||
const yyLast = 42 | |||
var yyAct = [...]int{ | |||
16, 18, 21, 13, 27, 24, 17, 19, 20, 25, | |||
22, 15, 26, 23, 9, 11, 31, 14, 29, 3, | |||
10, 30, 2, 28, 5, 6, 7, 1, 4, 12, | |||
8, | |||
17, 16, 18, 23, 22, 30, 3, 21, 19, 20, | |||
29, 26, 22, 22, 1, 21, 21, 15, 28, 25, | |||
24, 27, 34, 14, 22, 13, 31, 21, 32, 33, | |||
22, 9, 11, 21, 5, 6, 2, 10, 4, 12, | |||
7, 8, | |||
} | |||
var yyPact = [...]int{ | |||
18, -1000, -1000, 18, 10, -1000, -1000, -1000, -6, 3, | |||
-1000, -1000, -1000, -1000, -1000, -4, -12, -1000, -1000, 0, | |||
-1, -1000, -1000, 13, -1000, -1000, 11, -1000, -1000, -1000, | |||
-1000, -1000, | |||
28, -1000, -1000, 28, 27, -1000, -1000, -1000, 16, 9, | |||
-1000, -1000, -1000, -1000, -1000, -3, -11, -1000, -1000, 6, | |||
5, -1000, -5, -1000, -1000, 23, -1000, -1000, 17, -1000, | |||
-1000, -1000, -1000, -1000, -1000, | |||
} | |||
var yyPgo = [...]int{ | |||
0, 30, 29, 28, 27, 22, 19, | |||
0, 0, 41, 39, 38, 14, 36, 6, | |||
} | |||
var yyR1 = [...]int{ | |||
0, 4, 5, 5, 6, 3, 3, 3, 1, 1, | |||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |||
1, 1, 1, 1, 2, 2, | |||
0, 5, 6, 6, 7, 4, 4, 4, 2, 2, | |||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |||
2, 2, 2, 2, 3, 3, 1, 1, | |||
} | |||
var yyR2 = [...]int{ | |||
0, 1, 2, 1, 3, 0, 1, 1, 1, 2, | |||
4, 1, 1, 3, 3, 3, 4, 5, 4, 5, | |||
4, 5, 4, 5, 0, 1, | |||
4, 5, 4, 5, 0, 1, 1, 2, | |||
} | |||
var yyChk = [...]int{ | |||
-1000, -4, -5, -6, -3, 6, 7, -5, -1, 4, | |||
10, 5, -2, 9, 14, 8, 4, 10, 5, 11, | |||
12, 14, 10, 13, 5, 10, 13, 5, 10, 5, | |||
10, 5, | |||
-1000, -5, -6, -7, -4, 6, 7, -6, -2, 4, | |||
10, 5, -3, 9, 14, 8, 4, -1, 5, 11, | |||
12, 10, 7, 14, -1, 13, 5, -1, 13, 5, | |||
10, -1, 5, -1, 5, | |||
} | |||
var yyDef = [...]int{ | |||
5, -2, 1, -2, 0, 6, 7, 2, 24, 8, | |||
11, 12, 4, 25, 9, 0, 13, 14, 15, 0, | |||
0, 10, 16, 0, 20, 18, 0, 22, 17, 21, | |||
19, 23, | |||
0, 26, 0, 10, 16, 0, 20, 18, 0, 22, | |||
27, 17, 21, 19, 23, | |||
} | |||
var yyTok1 = [...]int{ | |||
@@ -474,25 +475,25 @@ yydefault: | |||
case 1: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:39 | |||
//line query_string.y:40 | |||
{ | |||
logDebugGrammar("INPUT") | |||
} | |||
case 2: | |||
yyDollar = yyS[yypt-2 : yypt+1] | |||
//line query_string.y:44 | |||
//line query_string.y:45 | |||
{ | |||
logDebugGrammar("SEARCH PARTS") | |||
} | |||
case 3: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:48 | |||
//line query_string.y:49 | |||
{ | |||
logDebugGrammar("SEARCH PART") | |||
} | |||
case 4: | |||
yyDollar = yyS[yypt-3 : yypt+1] | |||
//line query_string.y:53 | |||
//line query_string.y:54 | |||
{ | |||
query := yyDollar[2].q | |||
if yyDollar[3].pf != nil { | |||
@@ -511,27 +512,27 @@ yydefault: | |||
} | |||
case 5: | |||
yyDollar = yyS[yypt-0 : yypt+1] | |||
//line query_string.y:72 | |||
//line query_string.y:73 | |||
{ | |||
yyVAL.n = queryShould | |||
} | |||
case 6: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:76 | |||
//line query_string.y:77 | |||
{ | |||
logDebugGrammar("PLUS") | |||
yyVAL.n = queryMust | |||
} | |||
case 7: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:81 | |||
//line query_string.y:82 | |||
{ | |||
logDebugGrammar("MINUS") | |||
yyVAL.n = queryMustNot | |||
} | |||
case 8: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:87 | |||
//line query_string.y:88 | |||
{ | |||
str := yyDollar[1].s | |||
logDebugGrammar("STRING - %s", str) | |||
@@ -547,7 +548,7 @@ yydefault: | |||
} | |||
case 9: | |||
yyDollar = yyS[yypt-2 : yypt+1] | |||
//line query_string.y:101 | |||
//line query_string.y:102 | |||
{ | |||
str := yyDollar[1].s | |||
fuzziness, err := strconv.ParseFloat(yyDollar[2].s, 64) | |||
@@ -561,7 +562,7 @@ yydefault: | |||
} | |||
case 10: | |||
yyDollar = yyS[yypt-4 : yypt+1] | |||
//line query_string.y:113 | |||
//line query_string.y:114 | |||
{ | |||
field := yyDollar[1].s | |||
str := yyDollar[3].s | |||
@@ -577,16 +578,24 @@ yydefault: | |||
} | |||
case 11: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:127 | |||
//line query_string.y:128 | |||
{ | |||
str := yyDollar[1].s | |||
logDebugGrammar("STRING - %s", str) | |||
q := NewMatchQuery(str) | |||
q1 := NewMatchQuery(str) | |||
val, err := strconv.ParseFloat(yyDollar[1].s, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
inclusive := true | |||
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive) | |||
q := NewDisjunctionQuery([]Query{q1, q2}) | |||
q.queryStringMode = true | |||
yyVAL.q = q | |||
} | |||
case 12: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:134 | |||
//line query_string.y:143 | |||
{ | |||
phrase := yyDollar[1].s | |||
logDebugGrammar("PHRASE - %s", phrase) | |||
@@ -595,7 +604,7 @@ yydefault: | |||
} | |||
case 13: | |||
yyDollar = yyS[yypt-3 : yypt+1] | |||
//line query_string.y:141 | |||
//line query_string.y:150 | |||
{ | |||
field := yyDollar[1].s | |||
str := yyDollar[3].s | |||
@@ -613,18 +622,27 @@ yydefault: | |||
} | |||
case 14: | |||
yyDollar = yyS[yypt-3 : yypt+1] | |||
//line query_string.y:157 | |||
//line query_string.y:166 | |||
{ | |||
field := yyDollar[1].s | |||
str := yyDollar[3].s | |||
logDebugGrammar("FIELD - %s STRING - %s", field, str) | |||
q := NewMatchQuery(str) | |||
q.SetField(field) | |||
q1 := NewMatchQuery(str) | |||
q1.SetField(field) | |||
val, err := strconv.ParseFloat(yyDollar[3].s, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
inclusive := true | |||
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive) | |||
q2.SetField(field) | |||
q := NewDisjunctionQuery([]Query{q1, q2}) | |||
q.queryStringMode = true | |||
yyVAL.q = q | |||
} | |||
case 15: | |||
yyDollar = yyS[yypt-3 : yypt+1] | |||
//line query_string.y:166 | |||
//line query_string.y:184 | |||
{ | |||
field := yyDollar[1].s | |||
phrase := yyDollar[3].s | |||
@@ -635,10 +653,13 @@ yydefault: | |||
} | |||
case 16: | |||
yyDollar = yyS[yypt-4 : yypt+1] | |||
//line query_string.y:175 | |||
//line query_string.y:193 | |||
{ | |||
field := yyDollar[1].s | |||
min, _ := strconv.ParseFloat(yyDollar[4].s, 64) | |||
min, err := strconv.ParseFloat(yyDollar[4].s, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
minInclusive := false | |||
logDebugGrammar("FIELD - GREATER THAN %f", min) | |||
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil) | |||
@@ -647,10 +668,13 @@ yydefault: | |||
} | |||
case 17: | |||
yyDollar = yyS[yypt-5 : yypt+1] | |||
//line query_string.y:185 | |||
//line query_string.y:206 | |||
{ | |||
field := yyDollar[1].s | |||
min, _ := strconv.ParseFloat(yyDollar[5].s, 64) | |||
min, err := strconv.ParseFloat(yyDollar[5].s, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
minInclusive := true | |||
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min) | |||
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil) | |||
@@ -659,10 +683,13 @@ yydefault: | |||
} | |||
case 18: | |||
yyDollar = yyS[yypt-4 : yypt+1] | |||
//line query_string.y:195 | |||
//line query_string.y:219 | |||
{ | |||
field := yyDollar[1].s | |||
max, _ := strconv.ParseFloat(yyDollar[4].s, 64) | |||
max, err := strconv.ParseFloat(yyDollar[4].s, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
maxInclusive := false | |||
logDebugGrammar("FIELD - LESS THAN %f", max) | |||
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive) | |||
@@ -671,10 +698,13 @@ yydefault: | |||
} | |||
case 19: | |||
yyDollar = yyS[yypt-5 : yypt+1] | |||
//line query_string.y:205 | |||
//line query_string.y:232 | |||
{ | |||
field := yyDollar[1].s | |||
max, _ := strconv.ParseFloat(yyDollar[5].s, 64) | |||
max, err := strconv.ParseFloat(yyDollar[5].s, 64) | |||
if err != nil { | |||
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err)) | |||
} | |||
maxInclusive := true | |||
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max) | |||
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive) | |||
@@ -683,7 +713,7 @@ yydefault: | |||
} | |||
case 20: | |||
yyDollar = yyS[yypt-4 : yypt+1] | |||
//line query_string.y:215 | |||
//line query_string.y:245 | |||
{ | |||
field := yyDollar[1].s | |||
minInclusive := false | |||
@@ -700,7 +730,7 @@ yydefault: | |||
} | |||
case 21: | |||
yyDollar = yyS[yypt-5 : yypt+1] | |||
//line query_string.y:230 | |||
//line query_string.y:260 | |||
{ | |||
field := yyDollar[1].s | |||
minInclusive := true | |||
@@ -717,7 +747,7 @@ yydefault: | |||
} | |||
case 22: | |||
yyDollar = yyS[yypt-4 : yypt+1] | |||
//line query_string.y:245 | |||
//line query_string.y:275 | |||
{ | |||
field := yyDollar[1].s | |||
maxInclusive := false | |||
@@ -734,7 +764,7 @@ yydefault: | |||
} | |||
case 23: | |||
yyDollar = yyS[yypt-5 : yypt+1] | |||
//line query_string.y:260 | |||
//line query_string.y:290 | |||
{ | |||
field := yyDollar[1].s | |||
maxInclusive := true | |||
@@ -751,13 +781,13 @@ yydefault: | |||
} | |||
case 24: | |||
yyDollar = yyS[yypt-0 : yypt+1] | |||
//line query_string.y:276 | |||
//line query_string.y:306 | |||
{ | |||
yyVAL.pf = nil | |||
} | |||
case 25: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:280 | |||
//line query_string.y:310 | |||
{ | |||
yyVAL.pf = nil | |||
boost, err := strconv.ParseFloat(yyDollar[1].s, 64) | |||
@@ -768,6 +798,18 @@ yydefault: | |||
} | |||
logDebugGrammar("BOOST %f", boost) | |||
} | |||
case 26: | |||
yyDollar = yyS[yypt-1 : yypt+1] | |||
//line query_string.y:322 | |||
{ | |||
yyVAL.s = yyDollar[1].s | |||
} | |||
case 27: | |||
yyDollar = yyS[yypt-2 : yypt+1] | |||
//line query_string.y:326 | |||
{ | |||
yyVAL.s = "-" + yyDollar[2].s | |||
} | |||
} | |||
goto yystack /* stack new state and value */ | |||
} |
@@ -12,7 +12,10 @@ | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
//go:generate go tool yacc -o query_string.y.go query_string.y | |||
// as of Go 1.8 this requires the goyacc external tool | |||
// available from golang.org/x/tools/cmd/goyacc | |||
//go:generate goyacc -o query_string.y.go query_string.y | |||
//go:generate sed -i.tmp -e 1d query_string.y.go | |||
//go:generate rm query_string.y.go.tmp | |||
@@ -31,6 +34,9 @@ var debugParser bool | |||
var debugLexer bool | |||
func parseQuerySyntax(query string) (rq Query, err error) { | |||
if query == "" { | |||
return NewMatchNoneQuery(), nil | |||
} | |||
lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query))) | |||
doParse(lex) | |||
@@ -66,7 +72,7 @@ type lexerWrapper struct { | |||
func newLexerWrapper(lex yyLexer) *lexerWrapper { | |||
return &lexerWrapper{ | |||
lex: lex, | |||
query: NewBooleanQuery(nil, nil, nil), | |||
query: NewBooleanQueryForQueryString(nil, nil, nil), | |||
} | |||
} | |||
@@ -33,7 +33,9 @@ type RegexpQuery struct { | |||
// NewRegexpQuery creates a new Query which finds | |||
// documents containing terms that match the | |||
// specified regular expression. | |||
// specified regular expression. The regexp pattern | |||
// SHOULD NOT include ^ or $ modifiers, the search | |||
// will only match entire terms even without them. | |||
func NewRegexpQuery(regexp string) *RegexpQuery { | |||
return &RegexpQuery{ | |||
Regexp: regexp, | |||
@@ -45,7 +47,7 @@ func (q *RegexpQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *RegexpQuery) Boost() float64{ | |||
func (q *RegexpQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -53,11 +55,11 @@ func (q *RegexpQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *RegexpQuery) Field() string{ | |||
func (q *RegexpQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
@@ -67,7 +69,7 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, expl | |||
return nil, err | |||
} | |||
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), explain) | |||
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options) | |||
} | |||
func (q *RegexpQuery) Validate() error { | |||
@@ -76,14 +78,14 @@ func (q *RegexpQuery) Validate() error { | |||
func (q *RegexpQuery) compile() error { | |||
if q.compiled == nil { | |||
// require that pattern be anchored to start and end of term | |||
// require that pattern NOT be anchored to start and end of term | |||
actualRegexp := q.Regexp | |||
if !strings.HasPrefix(actualRegexp, "^") { | |||
actualRegexp = "^" + actualRegexp | |||
} | |||
if !strings.HasSuffix(actualRegexp, "$") { | |||
actualRegexp = actualRegexp + "$" | |||
if strings.HasPrefix(actualRegexp, "^") { | |||
actualRegexp = actualRegexp[1:] // remove leading ^ | |||
} | |||
// do not attempt to remove trailing $, it's presence is not | |||
// known to interfere with LiteralPrefix() the way ^ does | |||
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc | |||
var err error | |||
q.compiled, err = regexp.Compile(actualRegexp) | |||
if err != nil { |
@@ -40,7 +40,7 @@ func (q *TermQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *TermQuery) Boost() float64{ | |||
func (q *TermQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -48,14 +48,14 @@ func (q *TermQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *TermQuery) Field() string{ | |||
func (q *TermQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
} | |||
return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), explain) | |||
return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), options) | |||
} |
@@ -0,0 +1,95 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package query | |||
import ( | |||
"fmt" | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/mapping" | |||
"github.com/blevesearch/bleve/search" | |||
"github.com/blevesearch/bleve/search/searcher" | |||
) | |||
type TermRangeQuery struct { | |||
Min string `json:"min,omitempty"` | |||
Max string `json:"max,omitempty"` | |||
InclusiveMin *bool `json:"inclusive_min,omitempty"` | |||
InclusiveMax *bool `json:"inclusive_max,omitempty"` | |||
FieldVal string `json:"field,omitempty"` | |||
BoostVal *Boost `json:"boost,omitempty"` | |||
} | |||
// NewTermRangeQuery creates a new Query for ranges | |||
// of text term values. | |||
// Either, but not both endpoints can be nil. | |||
// The minimum value is inclusive. | |||
// The maximum value is exclusive. | |||
func NewTermRangeQuery(min, max string) *TermRangeQuery { | |||
return NewTermRangeInclusiveQuery(min, max, nil, nil) | |||
} | |||
// NewTermRangeInclusiveQuery creates a new Query for ranges | |||
// of numeric values. | |||
// Either, but not both endpoints can be nil. | |||
// Control endpoint inclusion with inclusiveMin, inclusiveMax. | |||
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *TermRangeQuery { | |||
return &TermRangeQuery{ | |||
Min: min, | |||
Max: max, | |||
InclusiveMin: minInclusive, | |||
InclusiveMax: maxInclusive, | |||
} | |||
} | |||
func (q *TermRangeQuery) SetBoost(b float64) { | |||
boost := Boost(b) | |||
q.BoostVal = &boost | |||
} | |||
func (q *TermRangeQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
func (q *TermRangeQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *TermRangeQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *TermRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
} | |||
var minTerm []byte | |||
if q.Min != "" { | |||
minTerm = []byte(q.Min) | |||
} | |||
var maxTerm []byte | |||
if q.Max != "" { | |||
maxTerm = []byte(q.Max) | |||
} | |||
return searcher.NewTermRangeSearcher(i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options) | |||
} | |||
func (q *TermRangeQuery) Validate() error { | |||
if q.Min == "" && q.Min == q.Max { | |||
return fmt.Errorf("term range query must specify min or max") | |||
} | |||
return nil | |||
} |
@@ -66,7 +66,7 @@ func (q *WildcardQuery) SetBoost(b float64) { | |||
q.BoostVal = &boost | |||
} | |||
func (q *WildcardQuery) Boost() float64{ | |||
func (q *WildcardQuery) Boost() float64 { | |||
return q.BoostVal.Value() | |||
} | |||
@@ -74,11 +74,11 @@ func (q *WildcardQuery) SetField(f string) { | |||
q.FieldVal = f | |||
} | |||
func (q *WildcardQuery) Field() string{ | |||
func (q *WildcardQuery) Field() string { | |||
return q.FieldVal | |||
} | |||
func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { | |||
func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | |||
field := q.FieldVal | |||
if q.FieldVal == "" { | |||
field = m.DefaultSearchField() | |||
@@ -91,7 +91,7 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, ex | |||
} | |||
} | |||
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), explain) | |||
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options) | |||
} | |||
func (q *WildcardQuery) Validate() error { | |||
@@ -101,6 +101,6 @@ func (q *WildcardQuery) Validate() error { | |||
} | |||
func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) { | |||
regexpString := "^" + wildcardRegexpReplacer.Replace(q.Wildcard) + "$" | |||
regexpString := wildcardRegexpReplacer.Replace(q.Wildcard) | |||
return regexp.Compile(regexpString) | |||
} |
@@ -19,26 +19,26 @@ import ( | |||
) | |||
type ConjunctionQueryScorer struct { | |||
explain bool | |||
options search.SearcherOptions | |||
} | |||
func NewConjunctionQueryScorer(explain bool) *ConjunctionQueryScorer { | |||
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer { | |||
return &ConjunctionQueryScorer{ | |||
explain: explain, | |||
options: options, | |||
} | |||
} | |||
func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch { | |||
var sum float64 | |||
var childrenExplanations []*search.Explanation | |||
if s.explain { | |||
if s.options.Explain { | |||
childrenExplanations = make([]*search.Explanation, len(constituents)) | |||
} | |||
locations := []search.FieldTermLocationMap{} | |||
for i, docMatch := range constituents { | |||
sum += docMatch.Score | |||
if s.explain { | |||
if s.options.Explain { | |||
childrenExplanations[i] = docMatch.Expl | |||
} | |||
if docMatch.Locations != nil { | |||
@@ -47,7 +47,7 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [ | |||
} | |||
newScore := sum | |||
var newExpl *search.Explanation | |||
if s.explain { | |||
if s.options.Explain { | |||
newExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations} | |||
} | |||
@@ -24,15 +24,15 @@ import ( | |||
type ConstantScorer struct { | |||
constant float64 | |||
boost float64 | |||
explain bool | |||
options search.SearcherOptions | |||
queryNorm float64 | |||
queryWeight float64 | |||
queryWeightExplanation *search.Explanation | |||
} | |||
func NewConstantScorer(constant float64, boost float64, explain bool) *ConstantScorer { | |||
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer { | |||
rv := ConstantScorer{ | |||
explain: explain, | |||
options: options, | |||
queryWeight: 1.0, | |||
constant: constant, | |||
boost: boost, | |||
@@ -52,7 +52,7 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) { | |||
// update the query weight | |||
s.queryWeight = s.boost * s.queryNorm | |||
if s.explain { | |||
if s.options.Explain { | |||
childrenExplanations := make([]*search.Explanation, 2) | |||
childrenExplanations[0] = &search.Explanation{ | |||
Value: s.boost, | |||
@@ -75,7 +75,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal | |||
score := s.constant | |||
if s.explain { | |||
if s.options.Explain { | |||
scoreExplanation = &search.Explanation{ | |||
Value: score, | |||
Message: fmt.Sprintf("ConstantScore()"), | |||
@@ -85,7 +85,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal | |||
// if the query weight isn't 1, multiply | |||
if s.queryWeight != 1.0 { | |||
score = score * s.queryWeight | |||
if s.explain { | |||
if s.options.Explain { | |||
childExplanations := make([]*search.Explanation, 2) | |||
childExplanations[0] = s.queryWeightExplanation | |||
childExplanations[1] = scoreExplanation | |||
@@ -100,7 +100,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal | |||
rv := ctx.DocumentMatchPool.Get() | |||
rv.IndexInternalID = id | |||
rv.Score = score | |||
if s.explain { | |||
if s.options.Explain { | |||
rv.Expl = scoreExplanation | |||
} | |||
@@ -21,26 +21,26 @@ import ( | |||
) | |||
type DisjunctionQueryScorer struct { | |||
explain bool | |||
options search.SearcherOptions | |||
} | |||
func NewDisjunctionQueryScorer(explain bool) *DisjunctionQueryScorer { | |||
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer { | |||
return &DisjunctionQueryScorer{ | |||
explain: explain, | |||
options: options, | |||
} | |||
} | |||
func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch { | |||
var sum float64 | |||
var childrenExplanations []*search.Explanation | |||
if s.explain { | |||
if s.options.Explain { | |||
childrenExplanations = make([]*search.Explanation, len(constituents)) | |||
} | |||
var locations []search.FieldTermLocationMap | |||
for i, docMatch := range constituents { | |||
sum += docMatch.Score | |||
if s.explain { | |||
if s.options.Explain { | |||
childrenExplanations[i] = docMatch.Expl | |||
} | |||
if docMatch.Locations != nil { | |||
@@ -49,14 +49,14 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [ | |||
} | |||
var rawExpl *search.Explanation | |||
if s.explain { | |||
if s.options.Explain { | |||
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations} | |||
} | |||
coord := float64(countMatch) / float64(countTotal) | |||
newScore := sum * coord | |||
var newExpl *search.Explanation | |||
if s.explain { | |||
if s.options.Explain { | |||
ce := make([]*search.Explanation, 2) | |||
ce[0] = rawExpl | |||
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)} |
@@ -23,20 +23,20 @@ import ( | |||
) | |||
type TermQueryScorer struct { | |||
queryTerm string | |||
queryTerm []byte | |||
queryField string | |||
queryBoost float64 | |||
docTerm uint64 | |||
docTotal uint64 | |||
idf float64 | |||
explain bool | |||
options search.SearcherOptions | |||
idfExplanation *search.Explanation | |||
queryNorm float64 | |||
queryWeight float64 | |||
queryWeightExplanation *search.Explanation | |||
} | |||
func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, explain bool) *TermQueryScorer { | |||
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { | |||
rv := TermQueryScorer{ | |||
queryTerm: queryTerm, | |||
queryField: queryField, | |||
@@ -44,11 +44,11 @@ func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, | |||
docTerm: docTerm, | |||
docTotal: docTotal, | |||
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), | |||
explain: explain, | |||
options: options, | |||
queryWeight: 1.0, | |||
} | |||
if explain { | |||
if options.Explain { | |||
rv.idfExplanation = &search.Explanation{ | |||
Value: rv.idf, | |||
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal), | |||
@@ -69,7 +69,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { | |||
// update the query weight | |||
s.queryWeight = s.queryBoost * s.idf * s.queryNorm | |||
if s.explain { | |||
if s.options.Explain { | |||
childrenExplanations := make([]*search.Explanation, 3) | |||
childrenExplanations[0] = &search.Explanation{ | |||
Value: s.queryBoost, | |||
@@ -100,7 +100,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term | |||
} | |||
score := tf * termMatch.Norm * s.idf | |||
if s.explain { | |||
if s.options.Explain { | |||
childrenExplanations := make([]*search.Explanation, 3) | |||
childrenExplanations[0] = &search.Explanation{ | |||
Value: tf, | |||
@@ -121,7 +121,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term | |||
// if the query weight isn't 1, multiply | |||
if s.queryWeight != 1.0 { | |||
score = score * s.queryWeight | |||
if s.explain { | |||
if s.options.Explain { | |||
childExplanations := make([]*search.Explanation, 2) | |||
childExplanations[0] = s.queryWeightExplanation | |||
childExplanations[1] = scoreExplanation | |||
@@ -136,44 +136,46 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term | |||
rv := ctx.DocumentMatchPool.Get() | |||
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...) | |||
rv.Score = score | |||
if s.explain { | |||
if s.options.Explain { | |||
rv.Expl = scoreExplanation | |||
} | |||
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 { | |||
locs := make([]search.Location, len(termMatch.Vectors)) | |||
locsUsed := 0 | |||
totalPositions := 0 | |||
for _, v := range termMatch.Vectors { | |||
totalPositions += len(v.ArrayPositions) | |||
} | |||
positions := make(search.ArrayPositions, totalPositions) | |||
positionsUsed := 0 | |||
rv.Locations = make(search.FieldTermLocationMap) | |||
for _, v := range termMatch.Vectors { | |||
tlm := rv.Locations[v.Field] | |||
if tlm == nil { | |||
tlm = make(search.TermLocationMap) | |||
rv.Locations[v.Field] = tlm | |||
} | |||
loc := search.Location{ | |||
Pos: float64(v.Pos), | |||
Start: float64(v.Start), | |||
End: float64(v.End), | |||
} | |||
loc := &locs[locsUsed] | |||
locsUsed++ | |||
loc.Pos = v.Pos | |||
loc.Start = v.Start | |||
loc.End = v.End | |||
if len(v.ArrayPositions) > 0 { | |||
loc.ArrayPositions = make([]float64, len(v.ArrayPositions)) | |||
loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)] | |||
for i, ap := range v.ArrayPositions { | |||
loc.ArrayPositions[i] = float64(ap) | |||
loc.ArrayPositions[i] = ap | |||
} | |||
positionsUsed += len(v.ArrayPositions) | |||
} | |||
locations := tlm[s.queryTerm] | |||
if locations == nil { | |||
locations = make(search.Locations, 1) | |||
locations[0] = &loc | |||
} else { | |||
locations = append(locations, &loc) | |||
} | |||
tlm[s.queryTerm] = locations | |||
rv.Locations[v.Field] = tlm | |||
tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc) | |||
} | |||
} | |||
return rv |
@@ -21,27 +21,32 @@ import ( | |||
"github.com/blevesearch/bleve/index" | |||
) | |||
type Location struct { | |||
Pos float64 `json:"pos"` | |||
Start float64 `json:"start"` | |||
End float64 `json:"end"` | |||
ArrayPositions []float64 `json:"array_positions"` | |||
} | |||
type ArrayPositions []uint64 | |||
// SameArrayElement returns true if two locations are point to | |||
// the same array element | |||
func (l *Location) SameArrayElement(other *Location) bool { | |||
if len(l.ArrayPositions) != len(other.ArrayPositions) { | |||
func (ap ArrayPositions) Equals(other ArrayPositions) bool { | |||
if len(ap) != len(other) { | |||
return false | |||
} | |||
for i, elem := range l.ArrayPositions { | |||
if other.ArrayPositions[i] != elem { | |||
for i := range ap { | |||
if ap[i] != other[i] { | |||
return false | |||
} | |||
} | |||
return true | |||
} | |||
type Location struct { | |||
// Pos is the position of the term within the field, starting at 1 | |||
Pos uint64 `json:"pos"` | |||
// Start and End are the byte offsets of the term in the field | |||
Start uint64 `json:"start"` | |||
End uint64 `json:"end"` | |||
// ArrayPositions contains the positions of the term within any elements. | |||
ArrayPositions ArrayPositions `json:"array_positions"` | |||
} | |||
type Locations []*Location | |||
type TermLocationMap map[string]Locations | |||
@@ -69,10 +74,6 @@ type DocumentMatch struct { | |||
// fields as float64s and date fields as time.RFC3339 formatted strings. | |||
Fields map[string]interface{} `json:"fields,omitempty"` | |||
// as we learn field terms, we can cache important ones for later use | |||
// for example, sorting and building facets need these values | |||
CachedFieldTerms index.FieldTerms `json:"-"` | |||
// if we load the document for this hit, remember it so we dont load again | |||
Document *document.Document `json:"-"` | |||
@@ -138,6 +139,11 @@ type Searcher interface { | |||
DocumentMatchPoolSize() int | |||
} | |||
type SearcherOptions struct { | |||
Explain bool | |||
IncludeTermVectors bool | |||
} | |||
// SearchContext represents the context around a single search | |||
type SearchContext struct { | |||
DocumentMatchPool *DocumentMatchPool |
@@ -38,14 +38,14 @@ type BooleanSearcher struct { | |||
initialized bool | |||
} | |||
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, explain bool) (*BooleanSearcher, error) { | |||
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { | |||
// build our searcher | |||
rv := BooleanSearcher{ | |||
indexReader: indexReader, | |||
mustSearcher: mustSearcher, | |||
shouldSearcher: shouldSearcher, | |||
mustNotSearcher: mustNotSearcher, | |||
scorer: scorer.NewConjunctionQueryScorer(explain), | |||
scorer: scorer.NewConjunctionQueryScorer(options), | |||
matches: make([]*search.DocumentMatch, 2), | |||
} | |||
rv.computeQueryNorm() |
@@ -31,10 +31,10 @@ type ConjunctionSearcher struct { | |||
maxIDIdx int | |||
scorer *scorer.ConjunctionQueryScorer | |||
initialized bool | |||
explain bool | |||
options search.SearcherOptions | |||
} | |||
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, explain bool) (*ConjunctionSearcher, error) { | |||
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) { | |||
// build the downstream searchers | |||
searchers := make(OrderedSearcherList, len(qsearchers)) | |||
for i, searcher := range qsearchers { | |||
@@ -45,10 +45,10 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S | |||
// build our searcher | |||
rv := ConjunctionSearcher{ | |||
indexReader: indexReader, | |||
explain: explain, | |||
options: options, | |||
searchers: searchers, | |||
currs: make([]*search.DocumentMatch, len(searchers)), | |||
scorer: scorer.NewConjunctionQueryScorer(explain), | |||
scorer: scorer.NewConjunctionQueryScorer(options), | |||
} | |||
rv.computeQueryNorm() | |||
return &rv, nil |
@@ -50,11 +50,22 @@ func tooManyClauses(count int) bool { | |||
} | |||
func tooManyClausesErr() error { | |||
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", DisjunctionMaxClauseCount) | |||
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", | |||
DisjunctionMaxClauseCount) | |||
} | |||
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) { | |||
if tooManyClauses(len(qsearchers)) { | |||
func NewDisjunctionSearcher(indexReader index.IndexReader, | |||
qsearchers []search.Searcher, min float64, options search.SearcherOptions) ( | |||
*DisjunctionSearcher, error) { | |||
return newDisjunctionSearcher(indexReader, qsearchers, min, options, | |||
true) | |||
} | |||
func newDisjunctionSearcher(indexReader index.IndexReader, | |||
qsearchers []search.Searcher, min float64, options search.SearcherOptions, | |||
limit bool) ( | |||
*DisjunctionSearcher, error) { | |||
if limit && tooManyClauses(len(qsearchers)) { | |||
return nil, tooManyClausesErr() | |||
} | |||
// build the downstream searchers | |||
@@ -70,7 +81,7 @@ func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S | |||
searchers: searchers, | |||
numSearchers: len(searchers), | |||
currs: make([]*search.DocumentMatch, len(searchers)), | |||
scorer: scorer.NewDisjunctionQueryScorer(explain), | |||
scorer: scorer.NewDisjunctionQueryScorer(options), | |||
min: int(min), | |||
matching: make([]*search.DocumentMatch, len(searchers)), | |||
matchingIdxs: make([]int, len(searchers)), | |||
@@ -161,7 +172,8 @@ func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) { | |||
} | |||
} | |||
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { | |||
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) ( | |||
*search.DocumentMatch, error) { | |||
if !s.initialized { | |||
err := s.initSearchers(ctx) | |||
if err != nil { | |||
@@ -199,7 +211,8 @@ func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM | |||
return rv, nil | |||
} | |||
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { | |||
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext, | |||
ID index.IndexInternalID) (*search.DocumentMatch, error) { | |||
if !s.initialized { | |||
err := s.initSearchers(ctx) | |||
if err != nil { |
@@ -28,13 +28,13 @@ type DocIDSearcher struct { | |||
} | |||
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, | |||
explain bool) (searcher *DocIDSearcher, err error) { | |||
options search.SearcherOptions) (searcher *DocIDSearcher, err error) { | |||
reader, err := indexReader.DocIDReaderOnly(ids) | |||
if err != nil { | |||
return nil, err | |||
} | |||
scorer := scorer.NewConstantScorer(1.0, boost, explain) | |||
scorer := scorer.NewConstantScorer(1.0, boost, options) | |||
return &DocIDSearcher{ | |||
scorer: scorer, | |||
reader: reader, |
@@ -0,0 +1,88 @@ | |||
// Copyright (c) 2017 Couchbase, Inc. | |||
// | |||
// Licensed under the Apache License, Version 2.0 (the "License"); | |||
// you may not use this file except in compliance with the License. | |||
// You may obtain a copy of the License at | |||
// | |||
// http://www.apache.org/licenses/LICENSE-2.0 | |||
// | |||
// Unless required by applicable law or agreed to in writing, software | |||
// distributed under the License is distributed on an "AS IS" BASIS, | |||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
// See the License for the specific language governing permissions and | |||
// limitations under the License. | |||
package searcher | |||
import ( | |||
"github.com/blevesearch/bleve/index" | |||
"github.com/blevesearch/bleve/search" | |||
) | |||
// FilterFunc defines a function which can filter documents | |||
// returning true means keep the document | |||
// returning false means do not keep the document | |||
type FilterFunc func(d *search.DocumentMatch) bool | |||
// FilteringSearcher wraps any other searcher, but checks any Next/Advance | |||
// call against the supplied FilterFunc | |||
type FilteringSearcher struct { | |||
child search.Searcher | |||
accept FilterFunc | |||
} | |||
func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher { | |||
return &FilteringSearcher{ | |||
child: s, | |||
accept: filter, | |||
} | |||
} | |||
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { | |||
next, err := f.child.Next(ctx) | |||
for next != nil && err == nil { | |||
if f.accept(next) { | |||
return next, nil | |||
} | |||
next, err = f.child.Next(ctx) | |||
} | |||
return nil, err | |||
} | |||
func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { | |||
adv, err := f.child.Advance(ctx, ID) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if adv == nil { | |||
return nil, nil | |||
} | |||
if f.accept(adv) { | |||
return adv, nil | |||
} | |||
return f.Next(ctx) | |||
} | |||
func (f *FilteringSearcher) Close() error { | |||
return f.child.Close() | |||
} | |||
func (f *FilteringSearcher) Weight() float64 { | |||
return f.child.Weight() | |||
} | |||
func (f *FilteringSearcher) SetQueryNorm(n float64) { | |||
f.child.SetQueryNorm(n) | |||
} | |||
func (f *FilteringSearcher) Count() uint64 { | |||
return f.child.Count() | |||
} | |||
func (f *FilteringSearcher) Min() int { | |||
return f.child.Min() | |||
} | |||
func (f *FilteringSearcher) DocumentMatchPoolSize() int { | |||
return f.child.DocumentMatchPoolSize() | |||
} |
@@ -19,17 +19,9 @@ import ( | |||
"github.com/blevesearch/bleve/search" | |||
) | |||
type FuzzySearcher struct { | |||
indexReader index.IndexReader | |||
term string | |||
prefix int | |||
fuzziness int | |||
field string | |||
explain bool | |||
searcher *DisjunctionSearcher | |||
} | |||
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) { | |||
func NewFuzzySearcher(indexReader index.IndexReader, term string, | |||
prefix, fuzziness int, field string, boost float64, | |||
options search.SearcherOptions) (search.Searcher, error) { | |||
// Note: we don't byte slice the term for a prefix because of runes. | |||
prefixTerm := "" | |||
for i, r := range term { | |||
@@ -40,46 +32,18 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin | |||
} | |||
} | |||
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, field, prefixTerm) | |||
if err != nil { | |||
return nil, err | |||
} | |||
// enumerate all the terms in the range | |||
qsearchers := make([]search.Searcher, 0, len(candidateTerms)) | |||
qsearchersClose := func() { | |||
for _, searcher := range qsearchers { | |||
_ = searcher.Close() | |||
} | |||
} | |||
for _, cterm := range candidateTerms { | |||
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain) | |||
if err != nil { | |||
qsearchersClose() | |||
return nil, err | |||
} | |||
qsearchers = append(qsearchers, qsearcher) | |||
} | |||
// build disjunction searcher of these ranges | |||
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain) | |||
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, | |||
field, prefixTerm) | |||
if err != nil { | |||
qsearchersClose() | |||
return nil, err | |||
} | |||
return &FuzzySearcher{ | |||
indexReader: indexReader, | |||
term: term, | |||
prefix: prefix, | |||
fuzziness: fuzziness, | |||
field: field, | |||
explain: explain, | |||
searcher: searcher, | |||
}, nil | |||
return NewMultiTermSearcher(indexReader, candidateTerms, field, | |||
boost, options, true) | |||
} | |||
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzziness int, field, prefixTerm string) (rv []string, err error) { | |||
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, | |||
fuzziness int, field, prefixTerm string) (rv []string, err error) { | |||
rv = make([]string, 0) | |||
var fieldDict index.FieldDict | |||
if len(prefixTerm) > 0 { | |||
@@ -108,36 +72,3 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzzine | |||
return rv, err | |||
} | |||
func (s *FuzzySearcher) Count() uint64 { | |||
return s.searcher.Count() | |||
} | |||
func (s *FuzzySearcher) Weight() float64 { | |||
return s.searcher.Weight() | |||
} | |||
func (s *FuzzySearcher) SetQueryNorm(qnorm float64) { | |||
s.searcher.SetQueryNorm(qnorm) | |||
} | |||
func (s *FuzzySearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { | |||
return s.searcher.Next(ctx) | |||
} | |||
func (s *FuzzySearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { | |||
return s.searcher.Advance(ctx, ID) | |||
} | |||
func (s *FuzzySearcher) Close() error { | |||
return s.searcher.Close() | |||
} | |||
func (s *FuzzySearcher) Min() int { | |||
return 0 | |||
} | |||
func (s *FuzzySearcher) DocumentMatchPoolSize() int { | |||
return s.searcher.DocumentMatchPoolSize() | |||
} |